# Copyright 2005 by Michiel de Hoon. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Test the Blast XML parser.""" import io import os import unittest import numpy as np from Bio import Blast from Bio import StreamModeError from Bio.SeqRecord import SeqRecord class TestBlastp(unittest.TestCase): """Test the Blast XML parser for blastp output.""" def check_xml_2218_blastp_002_header(self, records): self.assertEqual(records.program, "blastp") self.assertEqual(records.version, "BLASTP 2.2.18+") self.assertEqual( records.reference, 'Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "gpipe/9606/Previous/protein") self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "gi|585505|sp|Q08386|MOPB_RHOCA") self.assertEqual( records.query.description, "Molybdenum-pterin-binding protein mopB >gi|310278|gb|AAA71913.1| molybdenum-pterin-binding protein", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=270)") self.assertEqual(len(records.param), 5) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 0.01) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "m L; R -d repeat/repeat_9606;") def check_xml_2218_blastp_002_record_0(self, record): self.assertEqual(record.num, 1) self.assertEqual( repr(record), "", ) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "gi|585505|sp|Q08386|MOPB_RHOCA") self.assertEqual( record.query.description, "Molybdenum-pterin-binding protein mopB >gi|310278|gb|AAA71913.1| molybdenum-pterin-binding protein", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=270)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 27252) self.assertEqual(record.stat["db-len"], 13958303) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 0) def check_xml_2218_blastp_002_record_1(self, record): self.assertEqual(record.num, 2) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "gi|129628|sp|P07175.1|PARA_AGRTU") self.assertEqual(record.query.description, "Protein parA") self.assertEqual(repr(record.query.seq), "Seq(None, length=222)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 27252) self.assertEqual(record.stat["db-len"], 13958303) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 0) def test_xml_2218_blastp_002_iterator(self): """Parsing BLASTP 2.2.18+ (xml_2218_blastp_002.xml) by iteration.""" filename = "xml_2218_blastp_002.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_2218_blastp_002_header(records) record = next(records) self.check_xml_2218_blastp_002_record_0(record) record = next(records) self.check_xml_2218_blastp_002_record_1(record) with open(path, "rb") as stream: records = Blast.parse(stream) records = records[:] self.check_xml_2218_blastp_002_header(records) record = next(records) self.check_xml_2218_blastp_002_record_0(record) record = next(records) self.check_xml_2218_blastp_002_record_1(record) self.assertRaises(StopIteration, next, records) with open(path) as stream: with self.assertRaises(StreamModeError) as cm: Blast.parse(stream) self.assertEqual( str(cm.exception), "BLAST output files must be opened in binary mode." ) def test_xml_2218_blastp_002_list(self): """Parsing BLASTP 2.2.18+ (xml_2218_blastp_002.xml) as a list.""" filename = "xml_2218_blastp_002.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_2218_blastp_002_header(records) record = records[0] # everything should have been read in by now self.check_xml_2218_blastp_002_record_0(record) record = records[1] self.check_xml_2218_blastp_002_record_1(record) # header should still be OK self.check_xml_2218_blastp_002_header(records) self.assertEqual(len(records), 2) self.assertEqual( str(records), """\ Program: BLASTP 2.2.18+ db: gpipe/9606/Previous/protein Query: gi|585505|sp|Q08386|MOPB_RHOCA (length=270) Molybdenum-pterin-binding protein mopB >gi|310278|gb|AAA71913.1| molybdenum-pterin-binding protein Hits: No hits found Query: gi|129628|sp|P07175.1|PARA_AGRTU (length=222) Protein parA Hits: No hits found""", ) # check if converting the records to a list does not lose the header: with open(path, "rb") as stream: records = Blast.parse(stream) records = records[:] self.check_xml_2218_blastp_002_header(records) def test_xml_2218_blastp_002_writer(self): """Writing BLASTP 2.2.18+ (xml_2218_blastp_002.xml).""" filename = "xml_2218_blastp_002.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) records = records[:] stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 2) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_2218_blastp_002_header(written_records) record = next(written_records) self.check_xml_2218_blastp_002_record_0(record) record = next(written_records) self.check_xml_2218_blastp_002_record_1(record) self.assertRaises(StopIteration, next, written_records) def test_xml_2218L_blastp_001_parser(self): """Parsing blastp 2.2.18 [Mar-02-2008] (xml_2218L_blastp_001.xml).""" filename = "xml_2218L_blastp_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_2218L_blastp_001_records(records) self.check_xml_2218L_blastp_001_str(records) with Blast.parse(path) as records: self.check_xml_2218L_blastp_001_records(records) self.check_xml_2218L_blastp_001_str(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_2218L_blastp_001_record(record) record = Blast.read(path) self.check_xml_2218L_blastp_001_record(record) def check_xml_2218L_blastp_001_str(self, records): self.assertEqual( str(records), """\ Program: blastp 2.2.18 [Mar-02-2008] db: /Users/pjcock/Downloads/Software/blast-2.2.18/data/nr Hits: No hits found""", ) def check_xml_2218L_blastp_001_records(self, records): self.assertEqual(records.program, "blastp") self.assertEqual(records.version, "blastp 2.2.18 [Mar-02-2008]") self.assertEqual( records.reference, '~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual( records.db, "/Users/pjcock/Downloads/Software/blast-2.2.18/data/nr" ) self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "lcl|1_0") self.assertEqual(records.query.description, "Fake") self.assertEqual(repr(records.query.seq), "Seq(None, length=9)") self.assertEqual(len(records.param), 5) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 1e-05) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "F") record = next(records) self.assertRaises(StopIteration, next, records) self.check_xml_2218L_blastp_001_record(record) def check_xml_2218L_blastp_001_record(self, record): self.assertEqual(record.num, 1) self.assertIsNone(record.query) self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 6589360) self.assertEqual(record.stat["db-len"], 2253133281) self.assertEqual(record.stat["hsp-len"], 0) self.assertAlmostEqual(record.stat["eff-space"], 2.02782e10) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 0) def test_xml_2218L_blastp_001_writer(self): """Writing blastp 2.2.18 [Mar-02-2008] (xml_2218L_blastp_001.xml).""" filename = "xml_2218L_blastp_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_2218L_blastp_001_records(written_records) def test_xml_2226_blastp_003(self): """Parsing BLASTP 2.2.26+ (xml_2226_blastp_003.xml).""" filename = "xml_2226_blastp_003.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_2226_blastp_003(records) with Blast.parse(path) as records: self.check_xml_2226_blastp_003(records) with Blast.parse(path) as records: self.assertEqual( str(records), """\ Program: BLASTP 2.2.26+ db: db/minirefseq_prot Query: Query_1 (length=102) gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gnl|BL_ORD_ID|1 gi|308175296|ref|YP_003922001.1| membr... 1 1 gnl|BL_ORD_ID|2 gi|375363999|ref|YP_005132038.1| lytA ... 2 1 gnl|BL_ORD_ID|3 gi|154687679|ref|YP_001422840.1| LytA ... 3 1 gnl|BL_ORD_ID|4 gi|311070071|ref|YP_003974994.1| unnam... 4 1 gnl|BL_ORD_ID|15 gi|332258565|ref|XP_003278367.1| PRED...""", ) record = Blast.read(path) self.assertEqual( str(record), """\ Program: BLASTP 2.2.26+ db: db/minirefseq_prot Query: Query_1 (length=102) gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gnl|BL_ORD_ID|1 gi|308175296|ref|YP_003922001.1| membr... 1 1 gnl|BL_ORD_ID|2 gi|375363999|ref|YP_005132038.1| lytA ... 2 1 gnl|BL_ORD_ID|3 gi|154687679|ref|YP_001422840.1| LytA ... 3 1 gnl|BL_ORD_ID|4 gi|311070071|ref|YP_003974994.1| unnam... 4 1 gnl|BL_ORD_ID|15 gi|332258565|ref|XP_003278367.1| PRED...""", ) def check_xml_2226_blastp_003(self, records): self.assertEqual(records.program, "blastp") self.assertEqual(records.version, "BLASTP 2.2.26+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "db/minirefseq_prot") self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "Query_1") self.assertEqual( records.query.description, "gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=102)") self.assertEqual(len(records.param), 5) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 10.0) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "F") record = next(records) self.assertEqual(record.num, 1) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "Query_1") self.assertEqual( record.query.description, "gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=102)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 20) self.assertEqual(record.stat["db-len"], 6406) self.assertEqual(record.stat["hsp-len"], 38) self.assertEqual(record.stat["eff-space"], 361344) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 5) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|1") self.assertEqual(hit.target.name, "1") self.assertEqual( hit.target.description, "gi|308175296|ref|YP_003922001.1| membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=100)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 350.0) self.assertAlmostEqual(hsp.annotations["bit score"], 139.428) self.assertAlmostEqual(hsp.annotations["evalue"], 1.99275e-46, places=51) self.assertEqual(hsp.annotations["identity"], 69) self.assertEqual(hsp.annotations["positive"], 81) self.assertEqual(hsp.annotations["gaps"], 2) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 30, 30, 100], [ 0, 30, 32, 102]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 102)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVT...RAN')", ) self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKIFGCLFFILLLAGCGVTNEKSQGEDAGEKLVTKEGTYVGLADTHTIEVTVD...PAN')", ) self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|1") self.assertEqual(hsp.target.name, "1") self.assertEqual( hsp.target.description, "gi|308175296|ref|YP_003922001.1| membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKK LFFILLL+GCGV ++KSQGED + TKEGTYVGLADTHTIEVTVD+EPVS DITEES D+ N+G+KVT+ Y+KN +GQL+LKDIE AN", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_1 Length: 102 Strand: Plus gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] Target: gnl|BL_ORD_ID|1 Length: 100 Strand: Plus gi|308175296|ref|YP_003922001.1| membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7] Score:139 bits(350), Expect:2e-46, Identities:69/102(68%), Positives:81/102(79%), Gaps:2.102(2%) gnl|BL_OR 0 MKKIFGCLFFILLLAGCGVTNEKSQGEDAG--EKLVTKEGTYVGLADTHTIEVTVDHEPV 0 |||....|||||||.||||...||||||..--....||||||||||||||||||||.||| Query_1 0 MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPV gnl|BL_OR 58 SFDITEESADDVKNLNNGEKVTVKYQKNSKGQLVLKDIEPAN 100 60 |.||||||..|....|.|.|||..|.||..|||.|||||.|| 102 Query_1 60 SLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN 102 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|2") self.assertEqual(hit.target.name, "2") self.assertEqual( hit.target.description, "gi|375363999|ref|YP_005132038.1| lytA gene product [Bacillus amyloliquefaciens subsp. plantarum CAU B946]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=105)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 219.0) self.assertAlmostEqual(hsp.annotations["bit score"], 88.9669) self.assertAlmostEqual(hsp.annotations["evalue"], 6.94052e-27, places=32) self.assertEqual(hsp.annotations["identity"], 48) self.assertEqual(hsp.annotations["positive"], 69) self.assertEqual(hsp.annotations["gaps"], 5) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 13, 17, 32, 32, 104], [ 0, 13, 13, 28, 29, 101]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 105)) self.assertEqual( repr(hsp.query.seq), "Seq({0: 'MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVT...ERA'}, length=102)", ) self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MKKTIAASFLILLFSVVLAACGTAEQSKKGSGSSENQAQKETAYYVGMADTHTI...EKA'}, length=105)", ) self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|2") self.assertEqual(hsp.target.name, "2") self.assertEqual( hsp.target.description, "gi|375363999|ref|YP_005132038.1| lytA gene product [Bacillus amyloliquefaciens subsp. plantarum CAU B946]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKK IA F ILL L+ CG Q +G S ++ + + YVG+ADTHTIEV VD++PVS + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_1 Length: 102 Strand: Plus gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] Target: gnl|BL_ORD_ID|2 Length: 105 Strand: Plus gi|375363999|ref|YP_005132038.1| lytA gene product [Bacillus amyloliquefaciens subsp. plantarum CAU B946] Score:88 bits(219), Expect:7e-27, Identities:48/105(46%), Positives:69/105(66%), Gaps:5.105(5%) gnl|BL_OR 0 MKKTIAASFLILLFSVVLAACGTAEQSKKGSG-SSENQAQKETAYYVGMADTHTIEVKVD 0 |||.||..|.|||----|..||...|...|..-|...........|||.||||||||.|| Query_1 0 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVD gnl|BL_OR 59 DQPVSFEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA 104 60 ..|||..........|.||...|||.|||..||.||...|.||.| 105 Query_1 56 NEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA 101 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|3") self.assertEqual(hit.target.name, "3") self.assertEqual( hit.target.description, "gi|154687679|ref|YP_001422840.1| LytA [Bacillus amyloliquefaciens FZB42]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=105)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 219.0) self.assertAlmostEqual(hsp.annotations["bit score"], 88.9669) self.assertAlmostEqual(hsp.annotations["evalue"], 8.41012e-27, places=32) self.assertEqual(hsp.annotations["identity"], 48) self.assertEqual(hsp.annotations["positive"], 69) self.assertEqual(hsp.annotations["gaps"], 5) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 13, 17, 32, 32, 104], [ 0, 13, 13, 28, 29, 101]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 105)) self.assertEqual( repr(hsp.query.seq), "Seq({0: 'MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVT...ERA'}, length=102)", ) self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MKKTIAASFLILLFSVVLAACGTADQSKKGSGSSENQAQKETAYYVGMADTHTI...EKA'}, length=105)", ) self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|3") self.assertEqual(hsp.target.name, "3") self.assertEqual( hsp.target.description, "gi|154687679|ref|YP_001422840.1| LytA [Bacillus amyloliquefaciens FZB42]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKK IA F ILL L+ CG Q +G S ++ + + YVG+ADTHTIEV VD++PVS + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_1 Length: 102 Strand: Plus gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] Target: gnl|BL_ORD_ID|3 Length: 105 Strand: Plus gi|154687679|ref|YP_001422840.1| LytA [Bacillus amyloliquefaciens FZB42] Score:88 bits(219), Expect:8e-27, Identities:48/105(46%), Positives:69/105(66%), Gaps:5.105(5%) gnl|BL_OR 0 MKKTIAASFLILLFSVVLAACGTADQSKKGSG-SSENQAQKETAYYVGMADTHTIEVKVD 0 |||.||..|.|||----|..||...|...|..-|...........|||.||||||||.|| Query_1 0 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVD gnl|BL_OR 59 DQPVSFEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA 104 60 ..|||..........|.||...|||.|||..||.||...|.||.| 105 Query_1 56 NEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA 101 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|4") self.assertEqual(hit.target.name, "4") self.assertEqual( hit.target.description, "gi|311070071|ref|YP_003974994.1| unnamed protein product [Bacillus atrophaeus 1942]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=105)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 204.0) self.assertAlmostEqual(hsp.annotations["bit score"], 83.1889) self.assertAlmostEqual(hsp.annotations["evalue"], 1.37847e-24, places=29) self.assertEqual(hsp.annotations["identity"], 45) self.assertEqual(hsp.annotations["positive"], 66) self.assertEqual(hsp.annotations["gaps"], 5) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 13, 17, 30, 30, 103], [ 0, 13, 13, 26, 27, 100]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 104)) self.assertEqual( repr(hsp.query.seq), "Seq({0: 'MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVT...IER'}, length=102)", ) self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MKKNVASSFLILLFSIILAACGTAEQSKEGNGSSSSQVQNETAYYVGMADTHTI...IEK'}, length=105)", ) self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|4") self.assertEqual(hsp.target.name, "4") self.assertEqual( hsp.target.description, "gi|311070071|ref|YP_003974994.1| unnamed protein product [Bacillus atrophaeus 1942]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKK +A F ILL L+ CG Q +G + S S ++ + YVG+ADTHTIEV +D++PVS + T++ + L++F DKV I+Y ND+GQ L +IE+", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_1 Length: 102 Strand: Plus gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] Target: gnl|BL_ORD_ID|4 Length: 105 Strand: Plus gi|311070071|ref|YP_003974994.1| unnamed protein product [Bacillus atrophaeus 1942] Score:83 bits(204), Expect:1e-24, Identities:45/104(43%), Positives:66/104(63%), Gaps:5.104(5%) gnl|BL_OR 0 MKKNVASSFLILLFSIILAACGTAEQSKEG-NGSSSSQVQNETAYYVGMADTHTIEVKID 0 |||..|..|.|||----|..||...|...|-..|..|........|||.||||||||..| Query_1 0 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVD gnl|BL_OR 59 DQPVSFEFTDDFSEILNEFEENDKVNISYLTNDKGQKELTEIEK 103 60 ..|||...|......|..|...|||.|.|..||.||..|..||. 104 Query_1 56 NEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIER 100 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|15") self.assertEqual(hit.target.name, "15") self.assertEqual( hit.target.description, "gi|332258565|ref|XP_003278367.1| PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=132)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 29.0) self.assertAlmostEqual(hsp.annotations["bit score"], 15.779) self.assertAlmostEqual(hsp.annotations["evalue"], 7.12269) self.assertEqual(hsp.annotations["identity"], 7) self.assertEqual(hsp.annotations["positive"], 11) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 79, 104], [ 59, 84]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 25)) self.assertEqual( repr(hsp.query.seq), "Seq({59: 'VSLDITEESTSDLDKFNSGDKVTIT'}, length=102)" ) self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({79: 'VEMGFLHVGQAGLELVTSGDPPTLT'}, length=132)" ) self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|15") self.assertEqual(hsp.target.name, "15") self.assertEqual( hsp.target.description, "gi|332258565|ref|XP_003278367.1| PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "V + + L+ SGD T+T") self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_1 Length: 102 Strand: Plus gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] Target: gnl|BL_ORD_ID|15 Length: 132 Strand: Plus gi|332258565|ref|XP_003278367.1| PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys] Score:15 bits(29), Expect:7, Identities:7/25(28%), Positives:11/25(44%), Gaps:0.25(0%) gnl|BL_OR 79 VEMGFLHVGQAGLELVTSGDPPTLT 104 0 |...........|....|||..|.| 25 Query_1 59 VSLDITEESTSDLDKFNSGDKVTIT 84 """, ) with self.assertRaises(IndexError) as cm: record[5] self.assertEqual(str(cm.exception), "index out of range") with self.assertRaises(TypeError) as cm: record[None] self.assertEqual(str(cm.exception), "key must be an integer, slice, or str") with self.assertRaises(KeyError) as cm: record["weird_key"] self.assertEqual(str(cm.exception), "'weird_key'") target_id = "gnl|BL_ORD_ID|4" self.assertIn(target_id, record) self.assertNotIn("weird_id", record) self.assertEqual(record[target_id].target.id, target_id) self.assertEqual(record.index(target_id), 3) with self.assertRaises(ValueError) as cm: record.index("weird_id") self.assertEqual(str(cm.exception), "'weird_id' not found") self.assertEqual( repr(hit), "", ) self.assertEqual( repr(hit[:0]), "" ) self.assertEqual( record.keys(), [ "gnl|BL_ORD_ID|1", "gnl|BL_ORD_ID|2", "gnl|BL_ORD_ID|3", "gnl|BL_ORD_ID|4", "gnl|BL_ORD_ID|15", ], ) def test_xml_2226_blastp_003_writer(self): """Writing BLASTP 2.2.26+ (xml_2226_blastp_003.xml).""" filename = "xml_2226_blastp_003.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_2226_blastp_003(written_records) def test_phiblast_parser(self): """Parsing BLASTP 2.14.1+ (phiblast.xml).""" filename = "phiblast.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_phiblast_records(records) with Blast.parse(path) as records: self.check_phiblast_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_phiblast_record(record) record = Blast.read(path) self.check_phiblast_record(record) def check_phiblast_records(self, records): self.assertEqual(records.program, "blastp") self.assertEqual(records.version, "BLASTP 2.14.1+") self.assertEqual( records.reference, 'Zheng Zhang, Alejandro A. Schäffer, Webb Miller, Thomas L. Madden, David J. Lipman, Eugene V. Koonin, and Stephen F. Altschul (1998), "Protein sequence similarity searches using patterns as seeds", Nucleic Acids Res. 26:3986-3990.', ) self.assertEqual(records.db, "nr") self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "Query_74414") self.assertEqual(records.query.description, "unnamed protein product") self.assertEqual(repr(records.query.seq), "Seq(None, length=664)") self.assertEqual(len(records.param), 6) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 0.05) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "F") self.assertEqual( records.param["pattern"], "[LIVMF]-G-E-x-[GAS]-[LIVM]-x(5,11)-R-[STAQ]-A-x-[LIVMA]-x-[STACV]", ) record = next(records) self.assertEqual(record.num, 1) self.assertRaises(StopIteration, next, records) self.check_phiblast_record(record) def check_phiblast_record(self, record): self.assertEqual(record.num, 1) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "Query_74414") self.assertEqual(record.query.description, "unnamed protein product") self.assertEqual(repr(record.query.seq), "Seq(None, length=664)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 633473216) self.assertEqual(record.stat["db-len"], 248084082182) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.047) self.assertAlmostEqual(record.stat["lambda"], 0.27) self.assertAlmostEqual(record.stat["entropy"], 1.0) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|NP_001075863.1|") self.assertEqual(hit.target.name, "NP_001075863") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel [Oryctolagus cuniculus] >emb|CAA42201.1| aorta CNG channel (rACNG) [Oryctolagus cuniculus] >prf||1919268A cyclic nucleotide-gated channel [Oryctolagus cuniculus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=732)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3336.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1290.65) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 664) self.assertEqual(hsp.annotations["positive"], 664) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 68, 732], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({68: 'MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP'}, length=732)", ) self.assertEqual(hsp.target.id, "ref|NP_001075863.1|") self.assertEqual(hsp.target.name, "NP_001075863") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel [Oryctolagus cuniculus] >emb|CAA42201.1| aorta CNG channel (rACNG) [Oryctolagus cuniculus] >prf||1919268A cyclic nucleotide-gated channel [Oryctolagus cuniculus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQQRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGDGKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDYFSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIHNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKLKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAAAEQP", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|NP_001075863.1| Length: 732 Strand: Plus cyclic nucleotide-gated olfactory channel [Oryctolagus cuniculus] >emb|CAA42201.1| aorta CNG channel (rACNG) [Oryctolagus cuniculus] >prf||1919268A cyclic nucleotide-gated channel [Oryctolagus cuniculus] Score:1290 bits(3336), Expect:0, Identities:664/664(100%), Positives:664/664(100%), Gaps:0.664(0%) ref|NP_00 68 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|NP_00 128 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|NP_00 188 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|NP_00 248 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|NP_00 308 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|NP_00 368 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|NP_00 428 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|NP_00 488 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|NP_00 548 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|NP_00 608 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|NP_00 668 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA 600 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|NP_00 728 AEQP 732 660 |||| 664 Query_744 660 AEQP 664 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_051689802.1|") self.assertEqual(hit.target.name, "XP_051689802") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel isoform X3 [Oryctolagus cuniculus] >sp|Q28718.1| RecName: Full=Cyclic nucleotide-gated olfactory channel; AltName: Full=Aorta CNG channel; Short=RACNG; AltName: Full=Cyclic nucleotide-gated cation channel 2; AltName: Full=Cyclic nucleotide-gated channel alpha-2; Short=CNG channel alpha-2; Short=CNG-2; Short=CNG2 [Oryctolagus cuniculus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3336.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1290.65) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 664) self.assertEqual(hsp.annotations["positive"], 664) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 664], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.target.id, "ref|XP_051689802.1|") self.assertEqual(hsp.target.name, "XP_051689802") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel isoform X3 [Oryctolagus cuniculus] >sp|Q28718.1| RecName: Full=Cyclic nucleotide-gated olfactory channel; AltName: Full=Aorta CNG channel; Short=RACNG; AltName: Full=Cyclic nucleotide-gated cation channel 2; AltName: Full=Cyclic nucleotide-gated channel alpha-2; Short=CNG channel alpha-2; Short=CNG-2; Short=CNG2 [Oryctolagus cuniculus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQQRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGDGKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDYFSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIHNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKLKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAAAEQP", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_051689802.1| Length: 664 Strand: Plus cyclic nucleotide-gated olfactory channel isoform X3 [Oryctolagus cuniculus] >sp|Q28718.1| RecName: Full=Cyclic nucleotide-gated olfactory channel; AltName: Full=Aorta CNG channel; Short=RACNG; AltName: Full=Cyclic nucleotide-gated cation channel 2; AltName: Full=Cyclic nucleotide-gated channel alpha-2; Short=CNG channel alpha-2; Short=CNG-2; Short=CNG2 [Oryctolagus cuniculus] Score:1290 bits(3336), Expect:0, Identities:664/664(100%), Positives:664/664(100%), Gaps:0.664(0%) ref|XP_05 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_05 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_05 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_05 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_05 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_05 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_05 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_05 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_05 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_05 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_05 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA 600 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_05 660 AEQP 664 660 |||| 664 Query_744 660 AEQP 664 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_017206345.1|") self.assertEqual(hit.target.name, "XP_017206345") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel isoform X2 [Oryctolagus cuniculus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=677)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3336.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1290.65) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 664) self.assertEqual(hsp.annotations["positive"], 664) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 13, 677], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({13: 'MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP'}, length=677)", ) self.assertEqual(hsp.target.id, "ref|XP_017206345.1|") self.assertEqual(hsp.target.name, "XP_017206345") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel isoform X2 [Oryctolagus cuniculus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQQRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGDGKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDYFSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIHNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKLKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAAAEQP", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_017206345.1| Length: 677 Strand: Plus cyclic nucleotide-gated olfactory channel isoform X2 [Oryctolagus cuniculus] Score:1290 bits(3336), Expect:0, Identities:664/664(100%), Positives:664/664(100%), Gaps:0.664(0%) ref|XP_01 13 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_01 73 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_01 133 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_01 193 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_01 253 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_01 313 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_01 373 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_01 433 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_01 493 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_01 553 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_01 613 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA 600 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_01 673 AEQP 677 660 |||| 664 Query_744 660 AEQP 664 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_051689801.1|") self.assertEqual(hit.target.name, "XP_051689801") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel isoform X1 [Oryctolagus cuniculus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=687)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3336.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1290.65) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 664) self.assertEqual(hsp.annotations["positive"], 664) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 23, 687], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({23: 'MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP'}, length=687)", ) self.assertEqual(hsp.target.id, "ref|XP_051689801.1|") self.assertEqual(hsp.target.name, "XP_051689801") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel isoform X1 [Oryctolagus cuniculus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQQRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGDGKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDYFSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIHNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKLKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAAAEQP", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_051689801.1| Length: 687 Strand: Plus cyclic nucleotide-gated olfactory channel isoform X1 [Oryctolagus cuniculus] Score:1290 bits(3336), Expect:0, Identities:664/664(100%), Positives:664/664(100%), Gaps:0.664(0%) ref|XP_05 23 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_05 83 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_05 143 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_05 203 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_05 263 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_05 323 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_05 383 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_05 443 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_05 503 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_05 563 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_05 623 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA 600 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_05 683 AEQP 687 660 |||| 664 Query_744 660 AEQP 664 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_004407164.1|") self.assertEqual(hit.target.name, "XP_004407164") self.assertEqual( hit.target.description, "PREDICTED: cyclic nucleotide-gated olfactory channel [Odobenus rosmarus divergens]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3231.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1249.79) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 639) self.assertEqual(hsp.annotations["positive"], 652) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 664], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTEKSNGVKSSPANNHNHHTPPAIKANGKDDHRTNSRPQSAADDDTSSELQRLA...DEP')", ) self.assertEqual(hsp.target.id, "ref|XP_004407164.1|") self.assertEqual(hsp.target.name, "XP_004407164") self.assertEqual( hsp.target.description, "PREDICTED: cyclic nucleotide-gated olfactory channel [Odobenus rosmarus divergens]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHN+H P IKANGKD+ RT SRPQSAADDDTSSELQRLAEMDAPQQ RGGFRRIVRLVG+IR+WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN DDYLSDG+NSPEP AA++P", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_004407164.1| Length: 664 Strand: Plus PREDICTED: cyclic nucleotide-gated olfactory channel [Odobenus rosmarus divergens] Score:1249 bits(3231), Expect:0, Identities:639/664(96%), Positives:652/664(98%), Gaps:0.664(0%) ref|XP_00 0 MTEKSNGVKSSPANNHNHHTPPAIKANGKDDHRTNSRPQSAADDDTSSELQRLAEMDAPQ 0 |||||||||||||||||.|.|..|||||||..||.||||||||||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_00 60 QGRGGFRRIVRLVGIIREWANKNFREEEPRPDSFLERFRGPELQTVTTQQGDGKGDKDGE 60 |.||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||. Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_00 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYYLVWLVLDY 120 |||||||||||||||||||||||||||||||||||||||||||||||||||.|||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_00 180 FSDVVYITDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_00 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_00 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_00 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_00 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_00 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_00 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_00 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNMDDYLSDGVNSPEPTA 600 |.|||||||||||||||||||||||||||||||||||.|||||..|||||||.|||||.| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_00 660 ADEP 664 660 |..| 664 Query_744 660 AEQP 664 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_008688471.1|") self.assertEqual(hit.target.name, "XP_008688471") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel [Ursus maritimus] >ref|XP_026343324.1| cyclic nucleotide-gated olfactory channel [Ursus arctos]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3228.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1248.63) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 638) self.assertEqual(hsp.annotations["positive"], 652) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 664], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTEKSNGVKSSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLA...DEP')", ) self.assertEqual(hsp.target.id, "ref|XP_008688471.1|") self.assertEqual(hsp.target.name, "XP_008688471") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel [Ursus maritimus] >ref|XP_026343324.1| cyclic nucleotide-gated olfactory channel [Ursus arctos]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHN+H P IKANGKD+ R+ SRPQSA DDDTSSELQRLAEMDAPQ+ RGGFRRIVRLVG+IR WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN EDDYLSDGMNSPEPAAA++P", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_008688471.1| Length: 664 Strand: Plus cyclic nucleotide-gated olfactory channel [Ursus maritimus] >ref|XP_026343324.1| cyclic nucleotide-gated olfactory channel [Ursus arctos] Score:1248 bits(3228), Expect:0, Identities:638/664(96%), Positives:652/664(98%), Gaps:0.664(0%) ref|XP_00 0 MTEKSNGVKSSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLAEMDAPQ 0 |||||||||||||||||.|.|..|||||||..|..||||||.|||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_00 60 RGRGGFRRIVRLVGIIRDWANKNFREEEPRPDSFLERFRGPELQTVTTQQGDGKGDKDGE 60 ..||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||. Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_00 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQKGYYLVWLVLDY 120 ||||||||||||||||||||||||||||||||||||||||||||||||.||.|||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_00 180 FSDVVYITDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_00 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_00 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_00 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_00 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_00 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_00 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_00 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNEDDYLSDGMNSPEPAA 600 |.|||||||||||||||||||||||||||||||||||.|||||.|||||||||||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_00 660 ADEP 664 660 |..| 664 Query_744 660 AEQP 664 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_011229794.1|") self.assertEqual(hit.target.name, "XP_011229794") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel [Ailuropoda melanoleuca] >gb|EFB14215.1| hypothetical protein PANDA_013994, partial [Ailuropoda melanoleuca]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3227.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1248.24) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 638) self.assertEqual(hsp.annotations["positive"], 652) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 664], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTEKSNGVKSSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLA...DEP')", ) self.assertEqual(hsp.target.id, "ref|XP_011229794.1|") self.assertEqual(hsp.target.name, "XP_011229794") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel [Ailuropoda melanoleuca] >gb|EFB14215.1| hypothetical protein PANDA_013994, partial [Ailuropoda melanoleuca]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHN+H P IKANGKD+ R+ SRPQSA DDDTSSELQRLAEMDAPQ+ RGGFRRIVRLVG+IR WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN EDDYLSDGMNSPEPAAA++P", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_011229794.1| Length: 664 Strand: Plus cyclic nucleotide-gated olfactory channel [Ailuropoda melanoleuca] >gb|EFB14215.1| hypothetical protein PANDA_013994, partial [Ailuropoda melanoleuca] Score:1248 bits(3227), Expect:0, Identities:638/664(96%), Positives:652/664(98%), Gaps:0.664(0%) ref|XP_01 0 MTEKSNGVKSSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLAEMDAPQ 0 |||||||||||||||||.|.|..|||||||..|..||||||.|||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_01 60 RGRGGFRRIVRLVGIIRDWANKNFREEEPRPDSFLERFRGPELQTVTTQQGDGKGDKDGE 60 ..||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||. Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_01 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQKGYYLVWLVLDY 120 ||||||||||||||||||||||||||||||||||||||||||||||||.||.|||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_01 180 FSDVVYIIDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_01 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_01 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_01 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_01 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_01 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_01 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_01 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNEDDYLSDGMNSPEPAA 600 |.|||||||||||||||||||||||||||||||||||.|||||.|||||||||||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_01 660 ADEP 664 660 |..| 664 Query_744 660 AEQP 664 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_045646452.1|") self.assertEqual(hit.target.name, "XP_045646452") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel [Ursus americanus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3223.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1246.68) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 637) self.assertEqual(hsp.annotations["positive"], 651) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 664], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTEKSNGVKCSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLA...DEP')", ) self.assertEqual(hsp.target.id, "ref|XP_045646452.1|") self.assertEqual(hsp.target.name, "XP_045646452") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel [Ursus americanus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVK SPANNHN+H P IKANGKD+ R+ SRPQSA DDDTSSELQRLAEMDAPQ+ RGGFRRIVRLVG+IR WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN EDDYLSDGMNSPEPAAA++P", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_045646452.1| Length: 664 Strand: Plus cyclic nucleotide-gated olfactory channel [Ursus americanus] Score:1246 bits(3223), Expect:0, Identities:637/664(96%), Positives:651/664(98%), Gaps:0.664(0%) ref|XP_04 0 MTEKSNGVKCSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLAEMDAPQ 0 |||||||||.|||||||.|.|..|||||||..|..||||||.|||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_04 60 RGRGGFRRIVRLVGIIRDWANKNFREEEPRPDSFLERFRGPELQTVTTQQGDGKGDKDGE 60 ..||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||. Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_04 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQKGYYLVWLVLDY 120 ||||||||||||||||||||||||||||||||||||||||||||||||.||.|||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_04 180 FSDVVYITDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_04 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_04 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_04 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_04 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_04 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_04 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_04 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNEDDYLSDGMNSPEPAA 600 |.|||||||||||||||||||||||||||||||||||.|||||.|||||||||||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_04 660 ADEP 664 660 |..| 664 Query_744 660 AEQP 664 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_035942617.1|") self.assertEqual(hit.target.name, "XP_035942617") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel [Halichoerus grypus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3221.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1245.9) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 638) self.assertEqual(hsp.annotations["positive"], 651) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 664], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTEKSNGVKSSPANNHNHHAPPVIKANGKDDHRTSSRPQSAADDDTSSELQRLA...DEP')", ) self.assertEqual(hsp.target.id, "ref|XP_035942617.1|") self.assertEqual(hsp.target.name, "XP_035942617") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel [Halichoerus grypus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHN+H P IKANGKD+ RT SRPQSAADDDTSSELQRLAEMD PQQ RGGFRRIVRLVG+IR+WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAM VLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN DDYLSDGMNSPEPAAA++P", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_035942617.1| Length: 664 Strand: Plus cyclic nucleotide-gated olfactory channel [Halichoerus grypus] Score:1245 bits(3221), Expect:0, Identities:638/664(96%), Positives:651/664(98%), Gaps:0.664(0%) ref|XP_03 0 MTEKSNGVKSSPANNHNHHAPPVIKANGKDDHRTSSRPQSAADDDTSSELQRLAEMDVPQ 0 |||||||||||||||||.|.|..|||||||..||.||||||||||||||||||||||.|| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_03 60 QGRGGFRRIVRLVGIIREWANKNFREEELRPDSFLERFRGPELQTVTTQQGDGKGDKDGE 60 |.||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||. Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_03 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMLVLYNWCLLVARACFSDLQKGYYLVWLVLDY 120 |||||||||||||||||||||||||||||.||||||||||||||||||.||.|||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_03 180 FSDVVYITDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI 180 |||||||.|||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_03 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_03 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_03 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_03 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_03 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_03 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_03 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNMDDYLSDGMNSPEPAA 600 |.|||||||||||||||||||||||||||||||||||.|||||..||||||||||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_03 660 ADEP 664 660 |..| 664 Query_744 660 AEQP 664 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_049729369.1|") self.assertEqual(hit.target.name, "XP_049729369") self.assertEqual( hit.target.description, "cyclic nucleotide-gated olfactory channel [Elephas maximus indicus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 3219.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1245.12) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 635) self.assertEqual(hsp.annotations["positive"], 654) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 664], [ 0, 664]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 664)) self.assertEqual( repr(hsp.query.seq), "Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')", ) self.assertEqual(hsp.query.id, "Query_74414") self.assertEqual(hsp.query.description, "unnamed protein product") self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTEKSNGVKSSPANNHNHHVPSTIKANGKDDRRTSSRPQSAADDDTSSELQRLA...EKP')", ) self.assertEqual(hsp.target.id, "ref|XP_049729369.1|") self.assertEqual(hsp.target.name, "XP_049729369") self.assertEqual( hsp.target.description, "cyclic nucleotide-gated olfactory channel [Elephas maximus indicus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MTEKSNGVKSSPANNHN+HVP+TIKANGKD+ RT SRPQSAADDDTSSELQRLAEMDAPQQ RGGFRRI+RLVGVIR+WAN+NFREE+ RPDSFLERFRGPELQTVTTQQGDGK DKDG+GKGTKKKFELFVLDPAGDWYYRWLF IA+PVLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSD+VYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHT+QFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDP YGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAA+MEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN E+DYLSDG+NSPEPAA E+P", ) self.assertEqual( str(hsp), """\ Query : Query_74414 Length: 664 Strand: Plus unnamed protein product Target: ref|XP_049729369.1| Length: 664 Strand: Plus cyclic nucleotide-gated olfactory channel [Elephas maximus indicus] Score:1245 bits(3219), Expect:0, Identities:635/664(96%), Positives:654/664(98%), Gaps:0.664(0%) ref|XP_04 0 MTEKSNGVKSSPANNHNHHVPSTIKANGKDDRRTSSRPQSAADDDTSSELQRLAEMDAPQ 0 |||||||||||||||||.|||.||||||||..||.||||||||||||||||||||||||| Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ ref|XP_04 60 QWRGGFRRIIRLVGVIREWANKNFREEDPRPDSFLERFRGPELQTVTTQQGDGKSDKDGE 60 |.|||||||.|||||||.|||.|||||..|||||||||||||||||||||||||.||||. Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD ref|XP_04 120 GKGTKKKFELFVLDPAGDWYYRWLFFIALPVLYNWCLLVARACFSDLQKGYYLVWLVLDY 120 |||||||||||||||||||||||||.||.|||||||||||||||||||.||.|||||||| Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY ref|XP_04 180 FSDMVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTMQFKLDVASIIPTDLIYFAVGI 180 |||.||||||||||||||||||||||||||||||||||.||||||||||||||||||||| Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI ref|XP_04 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI 240 |.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI ref|XP_04 300 GFGVDTWVYPNITDPAYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG 300 |||||||||||||||.|||||||||||||||||||||||||||||||||||||||||||| Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG ref|XP_04 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK ref|XP_04 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY 420 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY ref|XP_04 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI ref|XP_04 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAATMEVDVQEK 540 |||||||||||||||||||||||||||||||||||||||||||||||||||.|||||||| Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK ref|XP_04 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNEEDYLSDGINSPEPAA 600 |.|||||||||||||||||||||||||||||||||||.|||||.|.||||||.||||||| Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA ref|XP_04 660 VEKP 664 660 .|.| 664 Query_744 660 AEQP 664 """, ) def test_phiblast_writer(self): """Writing BLASTP 2.14.1+ (phiblast.xml).""" filename = "phiblast.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_phiblast_records(written_records) def test_xml_21500_blastp_001_parser(self): """Parsing BLASTP 2.15.0+ (xml_21500_blastp_001.xml).""" filename = "xml_21500_blastp_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_blastp_001_records(records) with Blast.parse(path) as records: self.check_xml_21500_blastp_001_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_blastp_001_record(record) record = Blast.read(path) self.check_xml_21500_blastp_001_record(record) with Blast.parse(path) as records: self.assertEqual( str(records), """\ Program: BLASTP 2.15.0+ db: nr Query: WXX52402.1 (length=239) RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 ref|WP_003221446.1| MULTISPECIES: RNA polymerase sporu... 1 1 dbj|BAI85158.2| sporulation sigma factor SigE [Bacillu... 2 1 ref|WP_120028072.1| RNA polymerase sporulation sigma f... 3 1 ref|WP_326121348.1| RNA polymerase sporulation sigma f... 4 1 ref|WP_128473893.1| RNA polymerase sporulation sigma f... 5 1 ref|WP_174228079.1| RNA polymerase sporulation sigma f... 6 1 ref|WP_315947263.1| RNA polymerase sporulation sigma f... 7 1 ref|WP_219912761.1| RNA polymerase sporulation sigma f... 8 1 ref|WP_038828182.1| RNA polymerase sporulation sigma f... 9 1 ref|WP_326211018.1| RNA polymerase sporulation sigma f...""", ) record = Blast.read(path) self.assertEqual( str(record), """\ Program: BLASTP 2.15.0+ db: nr Query: WXX52402.1 (length=239) RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 ref|WP_003221446.1| MULTISPECIES: RNA polymerase sporu... 1 1 dbj|BAI85158.2| sporulation sigma factor SigE [Bacillu... 2 1 ref|WP_120028072.1| RNA polymerase sporulation sigma f... 3 1 ref|WP_326121348.1| RNA polymerase sporulation sigma f... 4 1 ref|WP_128473893.1| RNA polymerase sporulation sigma f... 5 1 ref|WP_174228079.1| RNA polymerase sporulation sigma f... 6 1 ref|WP_315947263.1| RNA polymerase sporulation sigma f... 7 1 ref|WP_219912761.1| RNA polymerase sporulation sigma f... 8 1 ref|WP_038828182.1| RNA polymerase sporulation sigma f... 9 1 ref|WP_326211018.1| RNA polymerase sporulation sigma f...""", ) def test_xml2_21500_blastp_001_parser(self): """Parsing BLASTP 2.15.0+ (xml2_21500_blastp_001.xml).""" filename = "xml2_21500_blastp_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_blastp_001_records(records, xml2=True) with Blast.parse(path) as records: self.check_xml_21500_blastp_001_records(records, xml2=True) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_blastp_001_record(record, xml2=True) record = Blast.read(path) self.check_xml_21500_blastp_001_record(record, xml2=True) def check_xml_21500_blastp_001_records(self, records, xml2=False): self.assertEqual(records.program, "blastp") self.assertEqual(records.version, "BLASTP 2.15.0+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "nr") if not xml2: self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "WXX52402.1") self.assertEqual( records.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=239)") self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 0.05) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "F") if xml2: self.assertEqual(records.param["cbs"], 2) self.assertEqual(len(records.param), 6) else: self.assertEqual(len(records.param), 5) record = next(records) self.assertRaises(StopIteration, next, records) self.check_xml_21500_blastp_001_record(record, xml2=xml2) def check_xml_21500_blastp_001_record(self, record, xml2=False): hit = record[0] self.assertEqual(hit.num, 1) target = hit.target self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "ref|WP_003221446.1|") self.assertEqual(target.name, "WP_003221446") seq = target.seq self.assertEqual(repr(seq), "Seq(None, length=239)") if xml2: self.assertEqual( target.description, "MULTISPECIES: RNA polymerase sporulation sigma factor SigE [Bacillales]", ) self.assertEqual(target.annotations["taxid"], 1385) self.assertEqual(target.annotations["sciname"], "Bacillales") self.assertIs(target, hit.targets[0]) self.assertEqual(len(hit.targets), 8) target = hit.targets[1] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "ref|NP_389415.2|") self.assertEqual(target.name, "NP_389415") self.assertIs(target.seq, seq) self.assertEqual( target.description, "RNA polymerase sporulation-specific sigma-29 factor (sigma-E) [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(target.annotations["taxid"], 224308) self.assertEqual( target.annotations["sciname"], "Bacillus subtilis subsp. subtilis str. 168", ) target = hit.targets[2] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "sp|P06222.1|") self.assertEqual(target.name, "P06222") self.assertIs(target.seq, seq) self.assertEqual( target.description, "RecName: Full=RNA polymerase sigma-E factor; AltName: Full=P31; AltName: Full=Sigma-29; AltName: Full=Stage II sporulation protein GB; Flags: Precursor [Bacillus subtilis subsp. subtilis str. 168]", ) self.assertEqual(target.annotations["taxid"], 224308) self.assertEqual( target.annotations["sciname"], "Bacillus subtilis subsp. subtilis str. 168", ) target = hit.targets[3] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "gb|KFI04694.1|") self.assertEqual(target.name, "KFI04694") self.assertIs(target.seq, seq) self.assertEqual( target.description, "sporulation sigma factor SigE [Bacillus sp. BSC154]", ) self.assertEqual(target.annotations["taxid"], 1549811) self.assertEqual(target.annotations["sciname"], "Bacillus sp. BSC154") target = hit.targets[4] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "gb|MDZ5720185.1|") self.assertEqual(target.name, "MDZ5720185") self.assertIs(target.seq, seq) self.assertEqual( target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus sp. X(2023)]", ) self.assertEqual(target.annotations["taxid"], 3106047) self.assertEqual(target.annotations["sciname"], "Bacillus sp. X(2023)") target = hit.targets[5] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "gb|POO83984.1|") self.assertEqual(target.name, "POO83984") self.assertIs(target.seq, seq) self.assertEqual( target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus sp. MBGLi97]", ) self.assertEqual(target.annotations["taxid"], 2070760) self.assertEqual(target.annotations["sciname"], "Bacillus sp. MBGLi97") target = hit.targets[6] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "dbj|BAM52179.1|") self.assertEqual(target.name, "BAM52179") self.assertIs(target.seq, seq) self.assertEqual( target.description, "sporulation sigma factor SigE [Bacillus subtilis BEST7613]", ) self.assertEqual(target.annotations["taxid"], 1204343) self.assertEqual( target.annotations["sciname"], "Bacillus subtilis BEST7613" ) target = hit.targets[7] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "gb|ADM37626.1|") self.assertEqual(target.name, "ADM37626") self.assertIs(target.seq, seq) self.assertEqual( target.description, "sporulation-specific sigma factor sigma-E [Bacillus spizizenii str. W23]", ) self.assertEqual(target.annotations["taxid"], 655816) self.assertEqual( target.annotations["sciname"], "Bacillus spizizenii str. W23" ) else: self.assertEqual( target.description, "MULTISPECIES: RNA polymerase sporulation sigma factor SigE [Bacillales] >ref|NP_389415.2| RNA polymerase sporulation-specific sigma-29 factor (sigma-E) [Bacillus subtilis subsp. subtilis str. 168] >sp|P06222.1| RecName: Full=RNA polymerase sigma-E factor; AltName: Full=P31; AltName: Full=Sigma-29; AltName: Full=Stage II sporulation protein GB; Flags: Precursor [Bacillus subtilis subsp. subtilis str. 168] >gb|KFI04694.1| sporulation sigma factor SigE [Bacillus sp. BSC154] >gb|MDZ5720185.1| RNA polymerase sporulation sigma factor SigE [Bacillus sp. X(2023)] >gb|POO83984.1| RNA polymerase sporulation sigma factor SigE [Bacillus sp. MBGLi97] >dbj|BAM52179.1| sporulation sigma factor SigE [Bacillus subtilis BEST7613] >gb|ADM37626.1| sporulation-specific sigma factor sigma-E [Bacillus spizizenii str. W23]", ) self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1227.0) self.assertAlmostEqual(hsp.annotations["bit score"], 477.248) self.assertAlmostEqual(hsp.annotations["evalue"], 2.44722e-169, places=174) self.assertEqual(hsp.annotations["identity"], 239) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hit.target.description) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) if xml2: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_003221446.1| Length: 239 Strand: Plus MULTISPECIES: RNA polymerase sporulation sigma factor SigE [Bacillales] Score:477 bits(1227), Expect:2e-169, Identities:239/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_00 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_00 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_00 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_00 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_00 239 239 WXX52402. 239 """, ) else: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_003221446.1| Length: 239 Strand: Plus MULTISPECIES: RNA polymerase sporulation sigma factor SigE [Bacillales] >ref|NP_389415.2| RNA polymerase sporulation-specific sigma-29 factor (sigma-E) [Bacillus subtilis subsp. subtilis str. 168] >sp|P06222.1| RecName: Full=RNA polymerase sigma-E factor; AltName: Full=P31; AltName: Full=Sigma-29; AltName: Full=Stage II sporulation protein GB; Flags: Precursor [Bacillus subtilis subsp. subtilis str. 168] >gb|KFI04694.1| sporulation sigma factor SigE [Bacillus sp. BSC154] >gb|MDZ5720185.1| RNA polymerase sporulation sigma factor SigE [Bacillus sp. X(2023)] >gb|POO83984.1| RNA polymerase sporulation sigma factor SigE [Bacillus sp. MBGLi97] >dbj|BAM52179.1| sporulation sigma factor SigE [Bacillus subtilis BEST7613] >gb|ADM37626.1| sporulation-specific sigma factor sigma-E [Bacillus spizizenii str. W23] Score:477 bits(1227), Expect:2e-169, Identities:239/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_00 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_00 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_00 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_00 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_00 239 239 WXX52402. 239 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "dbj|BAI85158.2|") self.assertEqual(hit.target.name, "BAI85158") self.assertEqual( hit.target.description, "sporulation sigma factor SigE [Bacillus subtilis subsp. natto BEST195]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=260)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1227.0) self.assertAlmostEqual(hsp.annotations["bit score"], 477.248) self.assertAlmostEqual(hsp.annotations["evalue"], 3.91488e-169, places=174) self.assertEqual(hsp.annotations["identity"], 239) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[21, 260], [ 0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({21: 'MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV'}, length=260)", ) self.assertEqual(hsp.target.id, "dbj|BAI85158.2|") self.assertEqual(hsp.target.name, "BAI85158") self.assertEqual( hsp.target.description, "sporulation sigma factor SigE [Bacillus subtilis subsp. natto BEST195]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: dbj|BAI85158.2| Length: 260 Strand: Plus sporulation sigma factor SigE [Bacillus subtilis subsp. natto BEST195] Score:477 bits(1227), Expect:4e-169, Identities:239/239(100%), Positives:239/239(100%), Gaps:0.239(0%) dbj|BAI85 21 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA dbj|BAI85 81 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN dbj|BAI85 141 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA dbj|BAI85 201 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV dbj|BAI85 260 239 WXX52402. 239 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|WP_120028072.1|") self.assertEqual(hit.target.name, "WP_120028072") if xml2: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) else: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|RJS52520.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1225.0) self.assertAlmostEqual(hsp.annotations["bit score"], 476.478) self.assertAlmostEqual(hsp.annotations["evalue"], 4.00949e-169, places=174) self.assertEqual(hsp.annotations["identity"], 238) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hit.target.description) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGL+GEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) if xml2: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_120028072.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:476 bits(1225), Expect:4e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_12 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_12 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_12 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_12 180 LEQLNEREKQIMELRFGLIGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_12 239 239 WXX52402. 239 """, ) else: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_120028072.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|RJS52520.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:476 bits(1225), Expect:4e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_12 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_12 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_12 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_12 180 LEQLNEREKQIMELRFGLIGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_12 239 239 WXX52402. 239 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|WP_326121348.1|") self.assertEqual(hit.target.name, "WP_326121348") if xml2: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) else: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|MEC0320584.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1224.0) self.assertAlmostEqual(hsp.annotations["bit score"], 476.093) self.assertAlmostEqual(hsp.annotations["evalue"], 5.63375e-169, places=174) self.assertEqual(hsp.annotations["identity"], 238) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLIMKLPNG...KMV')", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hit.target.description) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVL+MKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) if xml2: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_326121348.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:476 bits(1224), Expect:6e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_32 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLIMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||.|||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_32 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_32 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_32 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_32 239 239 WXX52402. 239 """, ) else: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_326121348.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|MEC0320584.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:476 bits(1224), Expect:6e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_32 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLIMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||.|||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_32 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_32 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_32 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_32 239 239 WXX52402. 239 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|WP_128473893.1|") self.assertEqual(hit.target.name, "WP_128473893") if xml2: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) else: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|QAR61557.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1224.0) self.assertAlmostEqual(hsp.annotations["bit score"], 476.093) self.assertAlmostEqual(hsp.annotations["evalue"], 5.6959e-169, places=173) self.assertEqual(hsp.annotations["identity"], 238) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYKLLMKLGMKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hsp.target.description) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLMKLG+KSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) if xml2: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_128473893.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:476 bits(1224), Expect:6e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_12 0 MKKLKLRLTHLWYKLLMKLGMKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_12 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_12 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_12 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_12 239 239 WXX52402. 239 """, ) else: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_128473893.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|QAR61557.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:476 bits(1224), Expect:6e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_12 0 MKKLKLRLTHLWYKLLMKLGMKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_12 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_12 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_12 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_12 239 239 WXX52402. 239 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|WP_174228079.1|") self.assertEqual(hit.target.name, "WP_174228079") if xml2: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis]", ) else: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis] >gb|NTU26438.1| RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1224.0) self.assertAlmostEqual(hsp.annotations["bit score"], 476.093) self.assertAlmostEqual(hsp.annotations["evalue"], 7.17171e-169, places=174) self.assertEqual(hsp.annotations["identity"], 238) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYRLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hit.target.description) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWY+LLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) if xml2: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_174228079.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis] Score:476 bits(1224), Expect:7e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_17 0 MKKLKLRLTHLWYRLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||.|||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_17 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_17 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_17 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_17 239 239 WXX52402. 239 """, ) else: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_174228079.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis] >gb|NTU26438.1| RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis] Score:476 bits(1224), Expect:7e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_17 0 MKKLKLRLTHLWYRLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||.|||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_17 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_17 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_17 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_17 239 239 WXX52402. 239 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|WP_315947263.1|") self.assertEqual(hit.target.name, "WP_315947263") if xml2: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii]", ) else: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii] >gb|MDU0153406.1| RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1224.0) self.assertAlmostEqual(hsp.annotations["bit score"], 476.093) self.assertAlmostEqual(hsp.annotations["evalue"], 7.17171e-169, places=174) self.assertEqual(hsp.annotations["identity"], 238) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYKLLMRLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hit.target.description) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLM+LGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) if xml2: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_315947263.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii] Score:476 bits(1224), Expect:7e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_31 0 MKKLKLRLTHLWYKLLMRLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_31 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_31 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_31 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_31 239 239 WXX52402. 239 """, ) else: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_315947263.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii] >gb|MDU0153406.1| RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii] Score:476 bits(1224), Expect:7e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_31 0 MKKLKLRLTHLWYKLLMRLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_31 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_31 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_31 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_31 239 239 WXX52402. 239 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|WP_219912761.1|") self.assertEqual(hit.target.name, "WP_219912761") if xml2: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) else: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >emb|COM82207.1| RNA polymerase sigma factor RpoD [Bacillus subtilis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1223.0) self.assertAlmostEqual(hsp.annotations["bit score"], 475.707) self.assertAlmostEqual(hsp.annotations["evalue"], 7.49345e-169, places=174) self.assertEqual(hsp.annotations["identity"], 238) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hit.target.description) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATY+SRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) if xml2: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_219912761.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:475 bits(1223), Expect:7e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_21 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_21 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYSSRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||.|||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_21 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_21 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_21 239 239 WXX52402. 239 """, ) else: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_219912761.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >emb|COM82207.1| RNA polymerase sigma factor RpoD [Bacillus subtilis] Score:475 bits(1223), Expect:7e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_21 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_21 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYSSRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||.|||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_21 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_21 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_21 239 239 WXX52402. 239 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|WP_038828182.1|") self.assertEqual(hit.target.name, "WP_038828182") self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1223.0) self.assertAlmostEqual(hsp.annotations["bit score"], 475.707) self.assertAlmostEqual(hsp.annotations["evalue"], 7.49345e-169, places=174) self.assertEqual(hsp.annotations["identity"], 238) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.target.id, "ref|WP_038828182.1|") self.assertEqual(hsp.target.name, "WP_038828182") self.assertEqual( hsp.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAAR+ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_038828182.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:475 bits(1223), Expect:7e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_03 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARS 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||. WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_03 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_03 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_03 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_03 239 239 WXX52402. 239 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|WP_326211018.1|") self.assertEqual(hit.target.name, "WP_326211018") if xml2: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) else: self.assertEqual( hit.target.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|MEC1541690.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1223.0) self.assertAlmostEqual(hsp.annotations["bit score"], 475.707) self.assertAlmostEqual(hsp.annotations["evalue"], 8.36237e-169, places=174) self.assertEqual(hsp.annotations["identity"], 238) self.assertEqual(hsp.annotations["positive"], 239) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[0, 239], [0, 239]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 239)) self.assertEqual( repr(hsp.query.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')", ) self.assertEqual(hsp.query.id, "WXX52402.1") self.assertEqual( hsp.query.description, "RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMM')", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hit.target.description) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKM+", ) self.assertEqual( repr(hsp), "", ) if xml2: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_326211018.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:475 bits(1223), Expect:8e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_32 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_32 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_32 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_32 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMM 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||. WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_32 239 239 WXX52402. 239 """, ) else: self.assertEqual( str(hsp), """\ Query : WXX52402.1 Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Target: ref|WP_326211018.1| Length: 239 Strand: Plus RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|MEC1541690.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] Score:475 bits(1223), Expect:8e-169, Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%) ref|WP_32 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA ref|WP_32 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN ref|WP_32 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA ref|WP_32 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMM 180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||. WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV ref|WP_32 239 239 WXX52402. 239 """, ) with self.assertRaises(IndexError) as cm: record[12] self.assertEqual(str(cm.exception), "index out of range") with self.assertRaises(TypeError) as cm: record[None] self.assertEqual(str(cm.exception), "key must be an integer, slice, or str") with self.assertRaises(KeyError) as cm: record["weird_key"] self.assertEqual(str(cm.exception), "'weird_key'") target_id = "ref|WP_326121348.1|" self.assertIn(target_id, record) self.assertNotIn("weird_id", record) self.assertEqual(record[target_id].target.id, target_id) self.assertEqual(record.index(target_id), 3) with self.assertRaises(ValueError) as cm: record.index("weird_id") self.assertEqual(str(cm.exception), "'weird_id' not found") self.assertEqual( repr(hit), "", ) self.assertEqual( repr(hit[:0]), "" ) self.assertEqual( record.keys(), [ "ref|WP_003221446.1|", "dbj|BAI85158.2|", "ref|WP_120028072.1|", "ref|WP_326121348.1|", "ref|WP_128473893.1|", "ref|WP_174228079.1|", "ref|WP_315947263.1|", "ref|WP_219912761.1|", "ref|WP_038828182.1|", "ref|WP_326211018.1|", ], ) self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 718658123) self.assertEqual(record.stat["db-len"], 277418989154) if xml2: self.assertEqual(record.stat["hsp-len"], 161) self.assertEqual(record.stat["eff-space"], 12613772445378) else: self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) def test_xml_21500_blastp_001_writer(self): """Writing BLASTP 2.15.0+ (xml_21500_blastp_001.xml).""" filename = "xml_21500_blastp_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_blastp_001_records(written_records) def test_xml2_21500_blastp_001_writer(self): """Writing BLASTP 2.15.0+ (xml2_21500_blastp_001.xml).""" filename = "xml2_21500_blastp_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream, fmt="XML2") self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_blastp_001_records(written_records, xml2=True) class TestBlastn(unittest.TestCase): """Test the Blast XML parser for blastn output.""" def test_xml_21500_blastn_001_parser(self): """Parsing BLASTN 2.15.0+ (xml_21500_blastn_001.xml).""" filename = "xml_21500_blastn_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_blastn_001_records(records) with Blast.parse(path) as records: self.check_xml_21500_blastn_001_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_blastn_001_record(record) record = Blast.read(path) self.check_xml_21500_blastn_001_record(record) with Blast.parse(path) as records: self.assertEqual( str(records), """\ Program: BLASTN 2.15.0+ db: genomic/10090/GCF_000001635.26 Query: Query_78041 (length=285) G26684.1 human STS STS_D11570, sequence tagged site Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gi|372099107|ref|NC_000069.6| Mus musculus strain C57B... 1 1 gi|372099103|ref|NC_000073.6| Mus musculus strain C57B... 2 2 gi|372099106|ref|NC_000070.6| Mus musculus strain C57B... 3 2 gi|372099108|ref|NC_000068.7| Mus musculus strain C57B... 4 2 gi|372099097|ref|NC_000079.6| Mus musculus strain C57B... 5 2 gi|372099098|ref|NC_000078.6| Mus musculus strain C57B... 6 1 gi|372099049|ref|NT_187008.1| Mus musculus strain 129S... 7 1 gi|372099109|ref|NC_000067.6| Mus musculus strain C57B... 8 1 gi|372099101|ref|NC_000075.6| Mus musculus strain C57B... 9 1 gi|372099100|ref|NC_000076.6| Mus musculus strain C57B... 10 1 gi|372099094|ref|NC_000082.6| Mus musculus strain C57B...""", ) def test_xml2_21500_blastn_001_parser(self): """Parsing BLASTN 2.15.0+ (xml2_21500_blastn_001.xml).""" filename = "xml2_21500_blastn_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_blastn_001_records(records, xml2=True) with Blast.parse(path) as records: self.check_xml_21500_blastn_001_records(records, xml2=True) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_blastn_001_record(record, xml2=True) record = Blast.read(path) self.check_xml_21500_blastn_001_record(record, xml2=True) def check_xml_21500_blastn_001_records(self, records, xml2=False): self.assertEqual(records.program, "blastn") self.assertEqual(records.version, "BLASTN 2.15.0+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "genomic/10090/GCF_000001635.26") if not xml2: self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "Query_78041") self.assertEqual( records.query.description, "G26684.1 human STS STS_D11570, sequence tagged site", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=285)") self.assertEqual(len(records.param), 6) self.assertAlmostEqual(records.param["expect"], 10.0) self.assertEqual(records.param["sc-match"], 2) self.assertEqual(records.param["sc-mismatch"], -3) self.assertEqual(records.param["gap-open"], 5) self.assertEqual(records.param["gap-extend"], 2) self.assertEqual(records.param["filter"], "L;m;") record = next(records) self.assertRaises(StopIteration, next, records) self.check_xml_21500_blastn_001_record(record, xml2=xml2) def check_xml_21500_blastn_001_record(self, record, xml2=False): if not xml2: self.assertEqual(record.num, 1) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "Query_78041") self.assertEqual( record.query.description, "G26684.1 human STS STS_D11570, sequence tagged site", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=285)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 239) self.assertEqual(record.stat["db-len"], 2818974565) if xml2: self.assertEqual(record.stat["hsp-len"], 31) self.assertEqual(record.stat["eff-space"], 716017657624) else: self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.41) self.assertAlmostEqual(record.stat["lambda"], 0.625) self.assertAlmostEqual(record.stat["entropy"], 0.78) self.assertEqual(len(record), 11) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099107|ref|NC_000069.6|") self.assertEqual(hit.target.name, "NC_000069") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 3, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=160039680)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 44.0) self.assertAlmostEqual(hsp.annotations["bit score"], 40.9604) self.assertAlmostEqual(hsp.annotations["evalue"], 0.334664) self.assertEqual(hsp.annotations["identity"], 30) if not xml2: self.assertEqual(hsp.annotations["positive"], 30) self.assertEqual(hsp.annotations["gaps"], 1) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[101449177, 101449150, 101449149, 101449143], [ 133, 160, 160, 166]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 34)) self.assertEqual( repr(hsp.query.seq), "Seq({133: 'GAATCCTAGAGGCTTGATTGGCCCAGGCTGCTG'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({101449143: 'CAGCAGGCCAGGGCCAGTCCAGCCTCTAGGATTC'}, length=160039680)", ) self.assertEqual(hsp.target.id, "gi|372099107|ref|NC_000069.6|") self.assertEqual(hsp.target.name, "NC_000069") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 3, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "|||||||||||||| || |||||| || ||||||" ) if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099107|ref|NC_000069.6| Length: 160039680 Strand: Minus Mus musculus strain C57BL/6J chromosome 3, GRCm38.p6 C57BL/6J Score:40 bits(44), Expect:0.3, Identities:30/34(88%), Gaps:1.34(3%) gi|372099 101449177 GAATCCTAGAGGCTGGACTGGCCCTGGCCTGCTG 101449143 0 ||||||||||||||.||.||||||.||-|||||| 34 Query_780 133 GAATCCTAGAGGCTTGATTGGCCCAGG-CTGCTG 166 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099107|ref|NC_000069.6| Length: 160039680 Strand: Minus Mus musculus strain C57BL/6J chromosome 3, GRCm38.p6 C57BL/6J Score:40 bits(44), Expect:0.3, Identities:30/34(88%), Positives:30/34(88%), Gaps:1.34(3%) gi|372099 101449177 GAATCCTAGAGGCTGGACTGGCCCTGGCCTGCTG 101449143 0 ||||||||||||||.||.||||||.||-|||||| 34 Query_780 133 GAATCCTAGAGGCTTGATTGGCCCAGG-CTGCTG 166 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099103|ref|NC_000073.6|") self.assertEqual(hit.target.name, "NC_000073") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 7, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=145441459)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 44.0) self.assertAlmostEqual(hsp.annotations["bit score"], 40.9604) self.assertAlmostEqual(hsp.annotations["evalue"], 0.334664) self.assertEqual(hsp.annotations["identity"], 26) if not xml2: self.assertEqual(hsp.annotations["positive"], 26) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[131772185, 131772156], [ 204, 233]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 29)) self.assertEqual( repr(hsp.query.seq), "Seq({204: 'GAAAGGAAATNAAAATGGAAAGTTCTTGT'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({131772156: 'ACCAGAACTTTCCATTTTTTTTTCCTTTC'}, length=145441459)", ) self.assertEqual(hsp.target.id, "gi|372099103|ref|NC_000073.6|") self.assertEqual(hsp.target.name, "NC_000073") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 7, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "||||||||| ||||||||||||||| ||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099103|ref|NC_000073.6| Length: 145441459 Strand: Minus Mus musculus strain C57BL/6J chromosome 7, GRCm38.p6 C57BL/6J Score:40 bits(44), Expect:0.3, Identities:26/29(90%), Gaps:0.29(0%) gi|372099 131772185 GAAAGGAAAAAAAAATGGAAAGTTCTGGT 131772156 0 |||||||||..|||||||||||||||.|| 29 Query_780 204 GAAAGGAAATNAAAATGGAAAGTTCTTGT 233 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099103|ref|NC_000073.6| Length: 145441459 Strand: Minus Mus musculus strain C57BL/6J chromosome 7, GRCm38.p6 C57BL/6J Score:40 bits(44), Expect:0.3, Identities:26/29(90%), Positives:26/29(90%), Gaps:0.29(0%) gi|372099 131772185 GAAAGGAAAAAAAAATGGAAAGTTCTGGT 131772156 0 |||||||||..|||||||||||||||.|| 29 Query_780 204 GAAAGGAAATNAAAATGGAAAGTTCTTGT 233 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099106|ref|NC_000070.6|") self.assertEqual(hit.target.name, "NC_000070") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=156508116)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 43.0) self.assertAlmostEqual(hsp.annotations["bit score"], 40.0587) self.assertAlmostEqual(hsp.annotations["evalue"], 1.16809) self.assertEqual(hsp.annotations["identity"], 23) if not xml2: self.assertEqual(hsp.annotations["positive"], 23) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[9607562, 9607538], [ 61, 85]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 24)) self.assertEqual( repr(hsp.query.seq), "Seq({61: 'CCAACACAGGCCAGCGACTTCTGG'}, length=285)" ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({9607538: 'CCAGAAGCCGCTGGCCTGTGTTGG'}, length=156508116)", ) self.assertEqual(hsp.target.id, "gi|372099106|ref|NC_000070.6|") self.assertEqual(hsp.target.name, "NC_000070") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "|||||||||||||||| |||||||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099106|ref|NC_000070.6| Length: 156508116 Strand: Minus Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J Score:40 bits(43), Expect:1, Identities:23/24(96%), Gaps:0.24(0%) gi|372099 9607562 CCAACACAGGCCAGCGGCTTCTGG 9607538 0 ||||||||||||||||.||||||| 24 Query_780 61 CCAACACAGGCCAGCGACTTCTGG 85 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099106|ref|NC_000070.6| Length: 156508116 Strand: Minus Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J Score:40 bits(43), Expect:1, Identities:23/24(96%), Positives:23/24(96%), Gaps:0.24(0%) gi|372099 9607562 CCAACACAGGCCAGCGGCTTCTGG 9607538 0 ||||||||||||||||.||||||| 24 Query_780 61 CCAACACAGGCCAGCGACTTCTGG 85 """, ) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 40.0) self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 28) if not xml2: self.assertEqual(hsp.annotations["positive"], 28) self.assertEqual(hsp.annotations["gaps"], 1) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[142902531, 142902542, 142902543, 142902563], [ 241, 252, 252, 272]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 32)) self.assertEqual( repr(hsp.query.seq), "Seq({241: 'GCCTGACATGGGTAGCTGCTCAATAAATGCT'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({142902531: 'GCCTGGCATGAAGTAACTGCTCAATAAATGCT'}, length=156508116)", ) self.assertEqual(hsp.target.id, "gi|372099106|ref|NC_000070.6|") self.assertEqual(hsp.target.name, "NC_000070") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "||||| |||| ||| ||||||||||||||||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099106|ref|NC_000070.6| Length: 156508116 Strand: Plus Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:28/32(88%), Gaps:1.32(3%) gi|372099 142902531 GCCTGGCATGAAGTAACTGCTCAATAAATGCT 142902563 0 |||||.||||.-|||.|||||||||||||||| 32 Query_780 241 GCCTGACATGG-GTAGCTGCTCAATAAATGCT 272 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099106|ref|NC_000070.6| Length: 156508116 Strand: Plus Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:28/32(88%), Positives:28/32(88%), Gaps:1.32(3%) gi|372099 142902531 GCCTGGCATGAAGTAACTGCTCAATAAATGCT 142902563 0 |||||.||||.-|||.|||||||||||||||| 32 Query_780 241 GCCTGACATGG-GTAGCTGCTCAATAAATGCT 272 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099108|ref|NC_000068.7|") self.assertEqual(hit.target.name, "NC_000068") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=182113224)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 42.0) self.assertAlmostEqual(hsp.annotations["bit score"], 39.157) self.assertAlmostEqual(hsp.annotations["evalue"], 1.16809) self.assertEqual(hsp.annotations["identity"], 27) if not xml2: self.assertEqual(hsp.annotations["positive"], 27) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[3799646, 3799677], [ 238, 269]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 31)) self.assertEqual( repr(hsp.query.seq), "Seq({238: 'AAGGCCTGACATGGGTAGCTGCTCAATAAAT'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({3799646: 'AAGTCCTGGCATGAGTAGTTGCTCAATAAAT'}, length=182113224)", ) self.assertEqual(hsp.target.id, "gi|372099108|ref|NC_000068.7|") self.assertEqual(hsp.target.name, "NC_000068") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "||| |||| |||| |||| ||||||||||||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099108|ref|NC_000068.7| Length: 182113224 Strand: Plus Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J Score:39 bits(42), Expect:1, Identities:27/31(87%), Gaps:0.31(0%) gi|372099 3799646 AAGTCCTGGCATGAGTAGTTGCTCAATAAAT 3799677 0 |||.||||.||||.||||.|||||||||||| 31 Query_780 238 AAGGCCTGACATGGGTAGCTGCTCAATAAAT 269 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099108|ref|NC_000068.7| Length: 182113224 Strand: Plus Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J Score:39 bits(42), Expect:1, Identities:27/31(87%), Positives:27/31(87%), Gaps:0.31(0%) gi|372099 3799646 AAGTCCTGGCATGAGTAGTTGCTCAATAAAT 3799677 0 |||.||||.||||.||||.|||||||||||| 31 Query_780 238 AAGGCCTGACATGGGTAGCTGCTCAATAAAT 269 """, ) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 41.0) self.assertAlmostEqual(hsp.annotations["bit score"], 38.2554) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 23) if not xml2: self.assertEqual(hsp.annotations["positive"], 23) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[70278959, 70278984], [ 210, 235]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 25)) self.assertEqual( repr(hsp.query.seq), "Seq({210: 'AAATNAAAATGGAAAGTTCTTGTAG'}, length=285)" ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({70278959: 'AAATGAAAATGGAAAGTTCTTATAG'}, length=182113224)", ) self.assertEqual(hsp.target.id, "gi|372099108|ref|NC_000068.7|") self.assertEqual(hsp.target.name, "NC_000068") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "|||| |||||||||||||||| |||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099108|ref|NC_000068.7| Length: 182113224 Strand: Plus Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J Score:38 bits(41), Expect:4, Identities:23/25(92%), Gaps:0.25(0%) gi|372099 70278959 AAATGAAAATGGAAAGTTCTTATAG 70278984 0 ||||.||||||||||||||||.||| 25 Query_780 210 AAATNAAAATGGAAAGTTCTTGTAG 235 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099108|ref|NC_000068.7| Length: 182113224 Strand: Plus Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J Score:38 bits(41), Expect:4, Identities:23/25(92%), Positives:23/25(92%), Gaps:0.25(0%) gi|372099 70278959 AAATGAAAATGGAAAGTTCTTATAG 70278984 0 ||||.||||||||||||||||.||| 25 Query_780 210 AAATNAAAATGGAAAGTTCTTGTAG 235 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099097|ref|NC_000079.6|") self.assertEqual(hit.target.name, "NC_000079") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=120421639)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 42.0) self.assertAlmostEqual(hsp.annotations["bit score"], 39.157) self.assertAlmostEqual(hsp.annotations["evalue"], 1.16809) self.assertEqual(hsp.annotations["identity"], 25) if not xml2: self.assertEqual(hsp.annotations["positive"], 25) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[26806584, 26806556], [ 206, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 28)) self.assertEqual( repr(hsp.query.seq), "Seq({206: 'AAGGAAATNAAAATGGAAAGTTCTTGTA'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({26806556: 'TAGAAGAACTTTCCATTTTGATGTCCTT'}, length=120421639)", ) self.assertEqual(hsp.target.id, "gi|372099097|ref|NC_000079.6|") self.assertEqual(hsp.target.name, "NC_000079") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "||||| || |||||||||||||||| ||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099097|ref|NC_000079.6| Length: 120421639 Strand: Minus Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J Score:39 bits(42), Expect:1, Identities:25/28(89%), Gaps:0.28(0%) gi|372099 26806584 AAGGACATCAAAATGGAAAGTTCTTCTA 26806556 0 |||||.||.||||||||||||||||.|| 28 Query_780 206 AAGGAAATNAAAATGGAAAGTTCTTGTA 234 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099097|ref|NC_000079.6| Length: 120421639 Strand: Minus Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J Score:39 bits(42), Expect:1, Identities:25/28(89%), Positives:25/28(89%), Gaps:0.28(0%) gi|372099 26806584 AAGGACATCAAAATGGAAAGTTCTTCTA 26806556 0 |||||.||.||||||||||||||||.|| 28 Query_780 206 AAGGAAATNAAAATGGAAAGTTCTTGTA 234 """, ) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 40.0) self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 32) if not xml2: self.assertEqual(hsp.annotations["positive"], 32) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[56840340, 56840300], [ 233, 273]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 40)) self.assertEqual( repr(hsp.query.seq), "Seq({233: 'AGCGCAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTA'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({56840300: 'TAGTATTCACTGAACATTTTCCTATGTCAGGCCTTGCGCT'}, length=120421639)", ) self.assertEqual(hsp.target.id, "gi|372099097|ref|NC_000079.6|") self.assertEqual(hsp.target.name, "NC_000079") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "||||||||||||||||| || | || ||| | ||| |||" ) if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099097|ref|NC_000079.6| Length: 120421639 Strand: Minus Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:32/40(80%), Gaps:0.40(0%) gi|372099 56840340 AGCGCAAGGCCTGACATAGGAAAATGTTCAGTGAATACTA 56840300 0 |||||||||||||||||.||.|..||.|||.|.|||.||| 40 Query_780 233 AGCGCAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTA 273 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099097|ref|NC_000079.6| Length: 120421639 Strand: Minus Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:32/40(80%), Positives:32/40(80%), Gaps:0.40(0%) gi|372099 56840340 AGCGCAAGGCCTGACATAGGAAAATGTTCAGTGAATACTA 56840300 0 |||||||||||||||||.||.|..||.|||.|.|||.||| 40 Query_780 233 AGCGCAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTA 273 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099098|ref|NC_000078.6|") self.assertEqual(hit.target.name, "NC_000078") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=120129022)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 41.0) self.assertAlmostEqual(hsp.annotations["bit score"], 38.2554) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 22) if not xml2: self.assertEqual(hsp.annotations["positive"], 22) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[113030662, 113030685], [ 48, 71]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 23)) self.assertEqual( repr(hsp.query.seq), "Seq({48: 'CATCCATTCACACCCAACACAGG'}, length=285)" ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({113030662: 'CATCCATTCACACCCAGCACAGG'}, length=120129022)", ) self.assertEqual(hsp.target.id, "gi|372099098|ref|NC_000078.6|") self.assertEqual(hsp.target.name, "NC_000078") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "|||||||||||||||| ||||||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099098|ref|NC_000078.6| Length: 120129022 Strand: Plus Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J Score:38 bits(41), Expect:4, Identities:22/23(96%), Gaps:0.23(0%) gi|372099 113030662 CATCCATTCACACCCAGCACAGG 113030685 0 ||||||||||||||||.|||||| 23 Query_780 48 CATCCATTCACACCCAACACAGG 71 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099098|ref|NC_000078.6| Length: 120129022 Strand: Plus Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J Score:38 bits(41), Expect:4, Identities:22/23(96%), Positives:22/23(96%), Gaps:0.23(0%) gi|372099 113030662 CATCCATTCACACCCAGCACAGG 113030685 0 ||||||||||||||||.|||||| 23 Query_780 48 CATCCATTCACACCCAACACAGG 71 """, ) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 40.0) self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 28) if not xml2: self.assertEqual(hsp.annotations["positive"], 28) self.assertEqual(hsp.annotations["gaps"], 1) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[108990272, 108990248, 108990248, 108990241], [ 230, 254, 255, 262]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 32)) self.assertEqual( repr(hsp.query.seq), "Seq({230: 'TGTAGCGCAAGGCCTGACATGGGTAGCTGCTC'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({108990241: 'GACCAGCACCCATGTCAGGCCTAGAGCTACA'}, length=120129022)", ) self.assertEqual(hsp.target.id, "gi|372099098|ref|NC_000078.6|") self.assertEqual(hsp.target.name, "NC_000078") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "|||||| | ||||||||||||||| |||| ||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099098|ref|NC_000078.6| Length: 120129022 Strand: Minus Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:28/32(88%), Gaps:1.32(3%) gi|372099 108990272 TGTAGCTCTAGGCCTGACATGGGT-GCTGGTC 108990241 0 ||||||.|.|||||||||||||||-||||.|| 32 Query_780 230 TGTAGCGCAAGGCCTGACATGGGTAGCTGCTC 262 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099098|ref|NC_000078.6| Length: 120129022 Strand: Minus Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:28/32(88%), Positives:28/32(88%), Gaps:1.32(3%) gi|372099 108990272 TGTAGCTCTAGGCCTGACATGGGT-GCTGGTC 108990241 0 ||||||.|.|||||||||||||||-||||.|| 32 Query_780 230 TGTAGCGCAAGGCCTGACATGGGTAGCTGCTC 262 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099049|ref|NT_187008.1|") self.assertEqual(hit.target.name, "NT_187008") self.assertEqual( hit.target.description, "Mus musculus strain 129S1/SvImJ chromosome 16 genomic scaffold, GRCm38.p6 alternate locus group 129S1/SvImJ 129S1/SVIMJ_MMCHR16_CTG2", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=250595)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 40.0) self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 43) if not xml2: self.assertEqual(hsp.annotations["positive"], 43) self.assertEqual(hsp.annotations["gaps"], 2) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[158458, 158482, 158483, 158491, 158492, 158514], [ 174, 198, 198, 206, 206, 228]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 56)) self.assertEqual( repr(hsp.query.seq), "Seq({174: 'GGAGGCAAAGAATCCCTACCTCCTAGGGGTGAAAGGAAATNAAAATGGAAAGTT'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({158458: 'GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT'}, length=250595)", ) self.assertEqual(hsp.target.id, "gi|372099049|ref|NT_187008.1|") self.assertEqual(hsp.target.name, "NT_187008") self.assertEqual( hsp.target.description, "Mus musculus strain 129S1/SvImJ chromosome 16 genomic scaffold, GRCm38.p6 alternate locus group 129S1/SvImJ 129S1/SVIMJ_MMCHR16_CTG2", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "||||||||||||||||||| | | | | ||| || ||| |||||||||| ||", ) if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099049|ref|NT_187008.1| Length: 250595 Strand: Plus Mus musculus strain 129S1/SvImJ chromosome 16 genomic scaffold, GRCm38.p6 alternate locus group 129S1/SvImJ 129S1/SVIMJ_MMCHR16_CTG2 Score:37 bits(40), Expect:4, Identities:43/56(77%), Gaps:2.56(4%) gi|372099 158458 GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT 0 |||||||||||||||||||.|..|-|..|.|||-||.|||...||||||||||.|| Query_780 174 GGAGGCAAAGAATCCCTACCTCCT-AGGGGTGA-AAGGAAATNAAAATGGAAAGTT gi|372099 158514 56 Query_780 228 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099049|ref|NT_187008.1| Length: 250595 Strand: Plus Mus musculus strain 129S1/SvImJ chromosome 16 genomic scaffold, GRCm38.p6 alternate locus group 129S1/SvImJ 129S1/SVIMJ_MMCHR16_CTG2 Score:37 bits(40), Expect:4, Identities:43/56(77%), Positives:43/56(77%), Gaps:2.56(4%) gi|372099 158458 GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT 0 |||||||||||||||||||.|..|-|..|.|||-||.|||...||||||||||.|| Query_780 174 GGAGGCAAAGAATCCCTACCTCCT-AGGGGTGA-AAGGAAATNAAAATGGAAAGTT gi|372099 158514 56 Query_780 228 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099109|ref|NC_000067.6|") self.assertEqual(hit.target.name, "NC_000067") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 1, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=195471971)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 40.0) self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 35) if not xml2: self.assertEqual(hsp.annotations["positive"], 35) self.assertEqual(hsp.annotations["gaps"], 2) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[65190107, 65190128, 65190128, 65190144, 65190144, 65190148], [ 86, 107, 108, 124, 125, 129]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 43)) self.assertEqual( repr(hsp.query.seq), "Seq({86: 'GCTCAGCCACAGACATGGTTTGTNACTNTTGAGCTTCTGTTCC'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({65190107: 'GCTCAGCCACATACATGGTTTTAAGTGTTGAGGCTCTTTCC'}, length=195471971)", ) self.assertEqual(hsp.target.id, "gi|372099109|ref|NC_000067.6|") self.assertEqual(hsp.target.name, "NC_000067") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 1, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "||||||||||| ||||||||| | | | ||||| ||| ||||" ) if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099109|ref|NC_000067.6| Length: 195471971 Strand: Plus Mus musculus strain C57BL/6J chromosome 1, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:35/43(81%), Gaps:2.43(5%) gi|372099 65190107 GCTCAGCCACATACATGGTTT-TAAGTGTTGAGGCTCT-TTCC 65190148 0 |||||||||||.|||||||||-|.|.|.|||||..|||-|||| 43 Query_780 86 GCTCAGCCACAGACATGGTTTGTNACTNTTGAGCTTCTGTTCC 129 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099109|ref|NC_000067.6| Length: 195471971 Strand: Plus Mus musculus strain C57BL/6J chromosome 1, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:35/43(81%), Positives:35/43(81%), Gaps:2.43(5%) gi|372099 65190107 GCTCAGCCACATACATGGTTT-TAAGTGTTGAGGCTCT-TTCC 65190148 0 |||||||||||.|||||||||-|.|.|.|||||..|||-|||| 43 Query_780 86 GCTCAGCCACAGACATGGTTTGTNACTNTTGAGCTTCTGTTCC 129 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099101|ref|NC_000075.6|") self.assertEqual(hit.target.name, "NC_000075") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 9, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=124595110)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 40.0) self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 36) if not xml2: self.assertEqual(hsp.annotations["positive"], 36) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[58227241, 58227194], [ 237, 284]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 47)) self.assertEqual( repr(hsp.query.seq), "Seq({237: 'CAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTAGTNTGTTATTT'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({58227194: 'AAAAAAAAAAATAGTATTTATTGAGCAGTCATACCTGTCAGGCTTTG'}, length=124595110)", ) self.assertEqual(hsp.target.id, "gi|372099101|ref|NC_000075.6|") self.assertEqual(hsp.target.name, "NC_000075") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 9, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "||| |||||||| | | ||||||||||||| ||| | | || |||", ) if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099101|ref|NC_000075.6| Length: 124595110 Strand: Minus Mus musculus strain C57BL/6J chromosome 9, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:36/47(77%), Gaps:0.47(0%) gi|372099 58227241 CAAAGCCTGACAGGTATGACTGCTCAATAAATACTATTTTTTTTTTT 58227194 0 |||.||||||||.|..|..|||||||||||||.|||.|.|.||.||| 47 Query_780 237 CAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTAGTNTGTTATTT 284 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099101|ref|NC_000075.6| Length: 124595110 Strand: Minus Mus musculus strain C57BL/6J chromosome 9, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:36/47(77%), Positives:36/47(77%), Gaps:0.47(0%) gi|372099 58227241 CAAAGCCTGACAGGTATGACTGCTCAATAAATACTATTTTTTTTTTT 58227194 0 |||.||||||||.|..|..|||||||||||||.|||.|.|.||.||| 47 Query_780 237 CAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTAGTNTGTTATTT 284 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099100|ref|NC_000076.6|") self.assertEqual(hit.target.name, "NC_000076") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 10, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=130694993)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 40.0) self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 20) if not xml2: self.assertEqual(hsp.annotations["positive"], 20) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[119337185, 119337205], [ 254, 274]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 20)) self.assertEqual( repr(hsp.query.seq), "Seq({254: 'AGCTGCTCAATAAATGCTAG'}, length=285)" ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({119337185: 'AGCTGCTCAATAAATGCTAG'}, length=130694993)", ) self.assertEqual(hsp.target.id, "gi|372099100|ref|NC_000076.6|") self.assertEqual(hsp.target.name, "NC_000076") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 10, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "||||||||||||||||||||") if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099100|ref|NC_000076.6| Length: 130694993 Strand: Plus Mus musculus strain C57BL/6J chromosome 10, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:20/20(100%), Gaps:0.20(0%) gi|372099 119337185 AGCTGCTCAATAAATGCTAG 119337205 0 |||||||||||||||||||| 20 Query_780 254 AGCTGCTCAATAAATGCTAG 274 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099100|ref|NC_000076.6| Length: 130694993 Strand: Plus Mus musculus strain C57BL/6J chromosome 10, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:20/20(100%), Positives:20/20(100%), Gaps:0.20(0%) gi|372099 119337185 AGCTGCTCAATAAATGCTAG 119337205 0 |||||||||||||||||||| 20 Query_780 254 AGCTGCTCAATAAATGCTAG 274 """, ) hit = record[10] self.assertEqual(hit.num, 11) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|372099094|ref|NC_000082.6|") self.assertEqual(hit.target.name, "NC_000082") self.assertEqual( hit.target.description, "Mus musculus strain C57BL/6J chromosome 16, GRCm38.p6 C57BL/6J", ) if xml2: self.assertEqual(hit.target.annotations["taxid"], 10090) self.assertEqual(hit.target.annotations["sciname"], "Mus musculus") self.assertEqual(repr(hit.target.seq), "Seq(None, length=98207768)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 40.0) self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537) self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705) self.assertEqual(hsp.annotations["identity"], 43) if not xml2: self.assertEqual(hsp.annotations["positive"], 43) self.assertEqual(hsp.annotations["gaps"], 2) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[18854779, 18854803, 18854804, 18854812, 18854813, 18854835], [ 174, 198, 198, 206, 206, 228]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 56)) self.assertEqual( repr(hsp.query.seq), "Seq({174: 'GGAGGCAAAGAATCCCTACCTCCTAGGGGTGAAAGGAAATNAAAATGGAAAGTT'}, length=285)", ) self.assertEqual(hsp.query.id, "Query_78041") self.assertEqual( hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({18854779: 'GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT'}, length=98207768)", ) self.assertEqual(hsp.target.id, "gi|372099094|ref|NC_000082.6|") self.assertEqual(hsp.target.name, "NC_000082") self.assertEqual( hsp.target.description, "Mus musculus strain C57BL/6J chromosome 16, GRCm38.p6 C57BL/6J", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "||||||||||||||||||| | | | | ||| || ||| |||||||||| ||", ) if xml2: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099094|ref|NC_000082.6| Length: 98207768 Strand: Plus Mus musculus strain C57BL/6J chromosome 16, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:43/56(77%), Gaps:2.56(4%) gi|372099 18854779 GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT 0 |||||||||||||||||||.|..|-|..|.|||-||.|||...||||||||||.|| Query_780 174 GGAGGCAAAGAATCCCTACCTCCT-AGGGGTGA-AAGGAAATNAAAATGGAAAGTT gi|372099 18854835 56 Query_780 228 """, ) else: self.assertEqual( str(hsp), """\ Query : Query_78041 Length: 285 Strand: Plus G26684.1 human STS STS_D11570, sequence tagged site Target: gi|372099094|ref|NC_000082.6| Length: 98207768 Strand: Plus Mus musculus strain C57BL/6J chromosome 16, GRCm38.p6 C57BL/6J Score:37 bits(40), Expect:4, Identities:43/56(77%), Positives:43/56(77%), Gaps:2.56(4%) gi|372099 18854779 GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT 0 |||||||||||||||||||.|..|-|..|.|||-||.|||...||||||||||.|| Query_780 174 GGAGGCAAAGAATCCCTACCTCCT-AGGGGTGA-AAGGAAATNAAAATGGAAAGTT gi|372099 18854835 56 Query_780 228 """, ) def test_xml_21500_blastn_001_writer(self): """Writing BLASTN 2.15.0+ (xml_21500_blastn_001.xml).""" filename = "xml_21500_blastn_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_blastn_001_records(written_records) def test_xml2_21500_blastn_001_writer(self): """Writing BLASTN 2.15.0+ XML2 (xml2_21500_blastn_001.xml).""" filename = "xml2_21500_blastn_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream, fmt="XML2") self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_blastn_001_records(written_records, xml2=True) def test_megablast_legacy(self): """Parsing megablast 2.2.26 [Sep-21-2011] (megablast_legacy.xml).""" filename = "megablast_legacy.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_megablast_legacy_records(records) with Blast.parse(path) as records: self.check_megablast_legacy_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_megablast_legacy_record(record) record = Blast.read(path) self.check_megablast_legacy_record(record) self.assertEqual( str(record[1::2]), """\ Program: megablast 2.2.26 [Sep-21-2011] db: m_cold.fasta Query: lcl|1_ (length=1111) gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence Hits: No hits found""", ) def check_megablast_legacy_records(self, records): self.assertEqual(records.program, "megablast") self.assertEqual(records.version, "megablast 2.2.26 [Sep-21-2011]") self.assertEqual( records.reference, '~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "m_cold.fasta") self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "lcl|1_") self.assertEqual( records.query.description, "gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=1111)") self.assertEqual(len(records.param), 6) self.assertAlmostEqual(records.param["expect"], 10.0) self.assertEqual(records.param["sc-match"], 1) self.assertEqual(records.param["sc-mismatch"], -3) self.assertEqual(records.param["gap-open"], 0) self.assertEqual(records.param["gap-extend"], 0) self.assertEqual(records.param["filter"], "D") record = next(records) self.assertRaises(StopIteration, next, records) self.check_megablast_legacy_record(record) self.assertEqual(len(records.mbstat), 7) self.assertEqual(records.mbstat["db-num"], 1) self.assertEqual(records.mbstat["db-len"], 1111) self.assertEqual(records.mbstat["hsp-len"], 10) self.assertAlmostEqual(records.mbstat["eff-space"], 1212200.0) self.assertAlmostEqual(records.mbstat["kappa"], 0.710603) self.assertAlmostEqual(records.mbstat["lambda"], 1.37406) self.assertAlmostEqual(records.mbstat["entropy"], 1.30725) def check_megablast_legacy_record(self, record): self.assertEqual(record.num, 0) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "lcl|1_") self.assertEqual( record.query.description, "gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=1111)") self.assertEqual(repr(record), "") self.assertEqual(len(record), 1) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|0") self.assertEqual(hit.target.name, "0") self.assertEqual( hit.target.description, "gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1111)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 788.0) self.assertAlmostEqual(hsp.annotations["bit score"], 1562.59) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 797) self.assertEqual(hsp.annotations["positive"], 797) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 797], [ 0, 797]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 797)) self.assertEqual( repr(hsp.query.seq), "Seq({0: 'CACTAGTACTCGAGCGTNCTGCACCAATTCGGCACGAGCAAGTGACTACGTTNT...GTG'}, length=1111)", ) self.assertEqual(hsp.query.id, "lcl|1_") self.assertEqual( hsp.query.description, "gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({0: 'CACTAGTACTCGAGCGTNCTGCACCAATTCGGCACGAGCAAGTGACTACGTTNT...GTG'}, length=1111)", ) self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|0") self.assertEqual(hsp.target.name, "0") self.assertEqual( hsp.target.description, "gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||", ) self.assertEqual( str(hsp), """\ Query : lcl|1_ Length: 1111 Strand: Plus gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence Target: gnl|BL_ORD_ID|0 Length: 1111 Strand: Plus gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence Score:1562 bits(788), Expect:0, Identities:797/797(100%), Positives:797/797(100%) gnl|BL_OR 0 CACTAGTACTCGAGCGTNCTGCACCAATTCGGCACGAGCAAGTGACTACGTTNTGTGAAC 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 0 CACTAGTACTCGAGCGTNCTGCACCAATTCGGCACGAGCAAGTGACTACGTTNTGTGAAC gnl|BL_OR 60 AGAAAATGGGGAGAGAAATGAAGTACTTGGCCATGAAAACTGATCAATTGGCCGTGGCTA 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 60 AGAAAATGGGGAGAGAAATGAAGTACTTGGCCATGAAAACTGATCAATTGGCCGTGGCTA gnl|BL_OR 120 ATATGATCGATTCCGATATCAATGAGCTTAAAATGGCAACAATGAGGCTCATCAATGATG 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 120 ATATGATCGATTCCGATATCAATGAGCTTAAAATGGCAACAATGAGGCTCATCAATGATG gnl|BL_OR 180 CTAGTATGCTCGGTCATTACGGGTTTGGCACTCATTTCCTCAAATGGCTCGCCTGCCTTG 180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 180 CTAGTATGCTCGGTCATTACGGGTTTGGCACTCATTTCCTCAAATGGCTCGCCTGCCTTG gnl|BL_OR 240 CGGCTATTTACTTGTTGATATTGGATCGAACAAACTGGAGAACCAACATGCTCACGTCAC 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 240 CGGCTATTTACTTGTTGATATTGGATCGAACAAACTGGAGAACCAACATGCTCACGTCAC gnl|BL_OR 300 TTTTAGTCCCTTACATATTCCTCAGTCTTCCATCCGGGCCATTTCATCTGTTCAGAGGCG 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 300 TTTTAGTCCCTTACATATTCCTCAGTCTTCCATCCGGGCCATTTCATCTGTTCAGAGGCG gnl|BL_OR 360 AGGTCGGGAAATGGATTGCCATCATTGCAGTCGTGTTAAGGCTGTTCTTCAACCGGCATT 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 360 AGGTCGGGAAATGGATTGCCATCATTGCAGTCGTGTTAAGGCTGTTCTTCAACCGGCATT gnl|BL_OR 420 TCCCAGTTTGGCTGGAAATGCCTGGATCGTTGATACTCCTCCTGGTGGTGGCACCAGACT 420 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 420 TCCCAGTTTGGCTGGAAATGCCTGGATCGTTGATACTCCTCCTGGTGGTGGCACCAGACT gnl|BL_OR 480 TCTTTACACACAAAGTGAAGGAGAGCTGGATCGGAATTGCAATTATGATAGCGATAGGGT 480 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 480 TCTTTACACACAAAGTGAAGGAGAGCTGGATCGGAATTGCAATTATGATAGCGATAGGGT gnl|BL_OR 540 GTCACCTGATGCAAGAACATATCAGAGCCACTGGTGGCTTTTGGAATTCCTTCACACAGA 540 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 540 GTCACCTGATGCAAGAACATATCAGAGCCACTGGTGGCTTTTGGAATTCCTTCACACAGA gnl|BL_OR 600 GCCACGGAACTTTTAACACAATTGGGCTTATCCTTCTACTGGCTTACCCTGTCTGTTTAT 600 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 600 GCCACGGAACTTTTAACACAATTGGGCTTATCCTTCTACTGGCTTACCCTGTCTGTTTAT gnl|BL_OR 660 GGTCATCTTCATGATGTAGTAGCTTAGTCTTGATCCTAATCCTCAAATNTACTTTTCCAG 660 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 660 GGTCATCTTCATGATGTAGTAGCTTAGTCTTGATCCTAATCCTCAAATNTACTTTTCCAG gnl|BL_OR 720 CTCTTTCGACGCTCTTGCTAAAGCCCATTCAATTCGCCCCATATTTCGCACACATTCATT 720 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| lcl|1_ 720 CTCTTTCGACGCTCTTGCTAAAGCCCATTCAATTCGCCCCATATTTCGCACACATTCATT gnl|BL_OR 780 TCACCACCCAATACGTG 797 780 ||||||||||||||||| 797 lcl|1_ 780 TCACCACCCAATACGTG 797 """, ) def test_megablast_legacy_writer(self): """Writing megablast 2.2.26 [Sep-21-2011] (megablast_legacy.xml).""" filename = "megablast_legacy.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_megablast_legacy_records(written_records) class TestBlastx(unittest.TestCase): """Test the Blast XML parser for blastx output.""" def test_xml_2222_blastx_001_parser(self): """Parsing BLASTX 2.2.22+ (xml_2222_blastx_001.xml).""" filename = "xml_2222_blastx_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_2222_blastx_001(records) with open(path, "rb") as stream: records = Blast.parse(stream) self.assertEqual( str(records), """\ Program: BLASTX 2.2.22+ db: nr Query: 1 (length=1002) gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gi|149390769|gb|ABR25402.1| unknown [Oryza sativa (ind... Query: 2 (length=2050) gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 2 gi|4218936|gb|AAD12237.1| hevein-like protein HLPf [Sa... 1 2 gi|4206074|gb|AAD11408.1| hevein-like protein [Sambucu... 2 2 gi|4206070|gb|AAD11406.1| hevein-like protein [Sambucu... 3 2 gi|4206072|gb|AAD11407.1| hevein-like protein [Sambucu... 4 2 gi|16903131|gb|AAL30421.1|AF434174_1 hevein-like prote... 5 2 gi|16903133|gb|AAL30422.1|AF434175_1 hevein-like prote... 6 2 gi|30691147|gb|AAO17294.1| chitinase [Ficus carica] 7 2 gi|222139388|gb|ACM45713.1| class I chitinase [Pyrus p... 8 2 gi|23496435|dbj|BAB40817.2| endochitinase MCHT-2 [Cucu... 9 2 gi|82621253|gb|ABB86300.1| chitinase [Ficus awkeotsang] Query: 3 (length=550) gi|5690369|gb|AF158246.1|AF158246 Cricetulus griseus glucose phosphate isomerase (GPI) gene, partial intron sequence Hits: No hits found Query: 4 (length=655) gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gi|166343825|gb|ABY86655.1| beta-tubulin 4 [Gossypium ... 1 1 gi|223549899|gb|EEF51386.1| tubulin beta chain, putati... 2 1 gi|18420724|ref|NP_568437.1| TUB8 (tubulin beta-8) [Ar... 3 1 gi|225426385|ref|XP_002271992.1| PREDICTED: hypothetic... 4 1 gi|15451226|gb|AAK96884.1| beta tubulin [Arabidopsis t... 5 1 gi|225470745|ref|XP_002267380.1| PREDICTED: hypothetic... 6 1 gi|586076|sp|P37392.1|TBB1_LUPAL RecName: Full=Tubulin... 7 1 gi|224104341|ref|XP_002313404.1| tubulin, beta chain [... 8 1 gi|223549679|gb|EEF51167.1| tubulin beta chain, putati... 9 1 gi|224058553|ref|XP_002299541.1| tubulin, beta chain [... Query: 5 (length=623) gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 2 gi|110740644|dbj|BAE98425.1| hypothetical protein [Ara... 1 1 gi|226453533|gb|EEH50844.1| predicted protein [Micromo... 2 1 gi|168069582|ref|XP_001786502.1| predicted protein [Ph... 3 1 gi|168068558|ref|XP_001786120.1| predicted protein [Ph... 4 1 gi|168068926|ref|XP_001786259.1| predicted protein [Ph... 5 1 gi|168070288|ref|XP_001786759.1| predicted protein [Ph... 6 1 gi|168068591|ref|XP_001786133.1| predicted protein [Ph... 7 1 gi|74622391|sp|Q8TGM5|ART3_YEAST Uncharacterized prote... 8 1 gi|168069944|ref|XP_001786634.1| predicted protein [Ph... 9 1 gi|50307717|ref|XP_453851.1| unnamed protein product [... Query: 6 (length=309) gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gi|3176603|gb|AAC18749.1| phytochrome A [Lathyrus odor... 1 1 gi|130188|sp|P15001.1|PHYA_PEA RecName: Full=Phytochro... 2 1 gi|2499555|sp|P93673.1|PHYA_LATSA RecName: Full=Phytoc... 3 1 gi|3176595|gb|AAC18745.1| phytochrome A [Lennea melano... 4 1 gi|1711106|gb|AAC18675.1| phytochrome A [Sophora affinis] 5 1 gi|1711090|gb|AAC18670.1| phytochrome A [Myrospermum s... 6 1 gi|3176605|gb|AAC18750.1| phytochrome A [Hybosema robu... 7 1 gi|3176454|gb|AAC18668.1| phytochrome A [Cyclolobium n... 8 1 gi|3176523|gb|AAC18709.1| phytochrome A [Millettia ric... 9 1 gi|3176494|gb|AAC18693.1| phytochrome A [Callerya atro... Query: 7 (length=2551) gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gi|27805603|sp|Q9TKP6.1|MATK_WISFR RecName: Full=Matur... 1 1 gi|171909144|gb|ACB58148.1| maturase K [Wisteria frute... 2 1 gi|171909146|gb|ACB58149.1| maturase K [Wisteria frute... 3 1 gi|171909132|gb|ACB58142.1| maturase K [Callerya megas... 4 1 gi|5817760|gb|AAD52903.1|AF142732_1 maturase-like prot... 5 1 gi|171909134|gb|ACB58143.1| maturase K [Wisteria brach... 6 1 gi|5817761|gb|AAD52904.1|AF142733_1 maturase-like prot... 7 1 gi|5817762|gb|AAD52905.1|AF142734_1 maturase-like prot... 8 1 gi|152014012|gb|ABS20107.1| maturase-like protein [Ast... 9 1 gi|146197442|dbj|BAF57483.1| maturase [Glycyrrhiza ura...""", ) def check_xml_2222_blastx_001(self, records): self.assertEqual(records.program, "blastx") self.assertEqual(records.version, "BLASTX 2.2.22+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "nr") self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "1") self.assertEqual( records.query.description, "gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=1002)") self.assertEqual(len(records.param), 5) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 0.0001) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "L;") record = next(records) self.assertEqual(record.num, 1) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "1") self.assertEqual( record.query.description, "gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=1002)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 8994603) self.assertEqual(record.stat["db-len"], -1216159329) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 367397307882) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 1) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|149390769|gb|ABR25402.1|") self.assertEqual(hit.target.name, "ABR25402") self.assertEqual( hit.target.description, "unknown [Oryza sativa (indica cultivar-group)]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=26)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 129.0) self.assertAlmostEqual(hsp.annotations["bit score"], 54.2989775733826) self.assertAlmostEqual( hsp.annotations["evalue"], 1.83262460293058e-05, places=19 ) self.assertEqual(hsp.annotations["identity"], 24) self.assertEqual(hsp.annotations["positive"], 25) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 26], [ 0, 26]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 26)) self.assertEqual(repr(hsp.query.seq), "Seq('HMLVSKIKPCMCKYEQIQTVKLRMAH')") self.assertEqual(hsp.query.id, "1") self.assertEqual( hsp.query.description, "gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(26))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "1:911..988") self.assertEqual(repr(hsp.target.seq), "Seq('HMLVSKIKPCMCKYELIRTVKLRMAH')") self.assertEqual(hsp.target.id, "gi|149390769|gb|ABR25402.1|") self.assertEqual(hsp.target.name, "ABR25402") self.assertEqual( hsp.target.description, "unknown [Oryza sativa (indica cultivar-group)]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "HMLVSKIKPCMCKYE I+TVKLRMAH") self.assertEqual( str(hsp), """\ Query : 1 Length: 26 Strand: Plus gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence Target: gi|149390769|gb|ABR25402.1| Length: 26 Strand: Plus unknown [Oryza sativa (indica cultivar-group)] Score:54 bits(129), Expect:2e-05, Identities:24/26(92%), Positives:25/26(96%), Gaps:0.26(0%) gi|149390 0 HMLVSKIKPCMCKYELIRTVKLRMAH 26 0 |||||||||||||||.|.|||||||| 26 1 0 HMLVSKIKPCMCKYEQIQTVKLRMAH 26 """, ) record = next(records) self.assertEqual(record.num, 2) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "2") self.assertEqual( record.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=2050)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 8994603) self.assertEqual(record.stat["db-len"], -1216159329) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 967993058520) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|4218936|gb|AAD12237.1|") self.assertEqual(hit.target.name, "AAD12237") self.assertEqual( hit.target.description, "hevein-like protein HLPf [Sambucus nigra]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=333)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1053.0) self.assertAlmostEqual(hsp.annotations["bit score"], 410.223385721017) self.assertAlmostEqual( hsp.annotations["evalue"], 3.48406066731465e-112, places=126 ) self.assertEqual(hsp.annotations["identity"], 199) self.assertEqual(hsp.annotations["positive"], 200) self.assertEqual(hsp.annotations["gaps"], 33) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 683.0) self.assertAlmostEqual(hsp.annotations["bit score"], 267.699542631596) self.assertAlmostEqual( hsp.annotations["evalue"], 2.79278546744412e-69, places=83 ) self.assertEqual(hsp.annotations["identity"], 127) self.assertEqual(hsp.annotations["positive"], 127) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[206, 333], [ 0, 127]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 127)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(127))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049") self.assertEqual( repr(hsp.target.seq), "Seq({206: 'NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV'}, length=333)", ) self.assertEqual(hsp.target.id, "gi|4218936|gb|AAD12237.1|") self.assertEqual(hsp.target.name, "AAD12237") self.assertEqual( hsp.target.description, "hevein-like protein HLPf [Sambucus nigra]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVAHIKMSVV", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 127 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|4218936|gb|AAD12237.1| Length: 333 Strand: Plus hevein-like protein HLPf [Sambucus nigra] Score:267 bits(683), Expect:3e-69, Identities:127/127(100%), Positives:127/127(100%), Gaps:0.127(0%) gi|421893 206 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD gi|421893 266 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA gi|421893 326 HIKMSVV 333 120 ||||||| 127 2 120 HIKMSVV 127 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|4206074|gb|AAD11408.1|") self.assertEqual(hit.target.name, "AAD11408") self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=333)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1043.0) self.assertAlmostEqual(hsp.annotations["bit score"], 406.371389961843) self.assertAlmostEqual( hsp.annotations["evalue"], 5.03097287018806e-111, places=125 ) self.assertEqual(hsp.annotations["identity"], 198) self.assertEqual(hsp.annotations["positive"], 199) self.assertEqual(hsp.annotations["gaps"], 33) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 672.0) self.assertAlmostEqual(hsp.annotations["bit score"], 263.462347296505) self.assertAlmostEqual( hsp.annotations["evalue"], 5.26696544712228e-68, places=82 ) self.assertEqual(hsp.annotations["identity"], 125) self.assertEqual(hsp.annotations["positive"], 126) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[206, 333], [ 0, 127]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 127)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(127))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049") self.assertEqual( repr(hsp.target.seq), "Seq({206: 'NYYYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SLV'}, length=333)", ) self.assertEqual(hsp.target.id, "gi|4206074|gb|AAD11408.1|") self.assertEqual(hsp.target.name, "AAD11408") self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "NY YGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVAHIKMS+V", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 127 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|4206074|gb|AAD11408.1| Length: 333 Strand: Plus hevein-like protein [Sambucus nigra] Score:263 bits(672), Expect:5e-68, Identities:125/127(98%), Positives:126/127(99%), Gaps:0.127(0%) gi|420607 206 NYYYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD 0 ||.||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD gi|420607 266 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA gi|420607 326 HIKMSLV 333 120 |||||.| 127 2 120 HIKMSVV 127 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|4206070|gb|AAD11406.1|") self.assertEqual(hit.target.name, "AAD11406") self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=333)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1043.0) self.assertAlmostEqual(hsp.annotations["bit score"], 406.371389961843) self.assertAlmostEqual( hsp.annotations["evalue"], 5.03097287018806e-111, places=125 ) self.assertEqual(hsp.annotations["identity"], 198) self.assertEqual(hsp.annotations["positive"], 199) self.assertEqual(hsp.annotations["gaps"], 33) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 680.0) self.assertAlmostEqual(hsp.annotations["bit score"], 266.543943903844) self.assertAlmostEqual( hsp.annotations["evalue"], 6.22167692942359e-69, places=83 ) self.assertEqual(hsp.annotations["identity"], 126) self.assertEqual(hsp.annotations["positive"], 127) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[206, 333], [ 0, 127]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 127)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(127))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049") self.assertEqual( repr(hsp.target.seq), "Seq({206: 'NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SLV'}, length=333)", ) self.assertEqual(hsp.target.id, "gi|4206070|gb|AAD11406.1|") self.assertEqual(hsp.target.name, "AAD11406") self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVAHIKMS+V", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 127 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|4206070|gb|AAD11406.1| Length: 333 Strand: Plus hevein-like protein [Sambucus nigra] Score:266 bits(680), Expect:6e-69, Identities:126/127(99%), Positives:127/127(100%), Gaps:0.127(0%) gi|420607 206 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD gi|420607 266 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA gi|420607 326 HIKMSLV 333 120 |||||.| 127 2 120 HIKMSVV 127 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|4206072|gb|AAD11407.1|") self.assertEqual(hit.target.name, "AAD11407") self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=333)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1016.0) self.assertAlmostEqual(hsp.annotations["bit score"], 395.971001412075) self.assertAlmostEqual( hsp.annotations["evalue"], 6.7995613312017e-108, places=121 ) self.assertEqual(hsp.annotations["identity"], 193) self.assertEqual(hsp.annotations["positive"], 195) self.assertEqual(hsp.annotations["gaps"], 33) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 646.0) self.assertAlmostEqual(hsp.annotations["bit score"], 253.447158322654) self.assertAlmostEqual( hsp.annotations["evalue"], 5.45045505347399e-65, places=79 ) self.assertEqual(hsp.annotations["identity"], 120) self.assertEqual(hsp.annotations["positive"], 124) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[206, 333], [ 0, 127]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 127)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(127))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049") self.assertEqual( repr(hsp.target.seq), "Seq({206: 'NYYYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV'}, length=333)", ) self.assertEqual(hsp.target.id, "gi|4206072|gb|AAD11407.1|") self.assertEqual(hsp.target.name, "AAD11407") self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "NY YGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVIS+II+SN GHQS LDTITTSIGYYKRYCDMLEVSYGDNL+NWFDETPF+KVAHIKMSVV", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 127 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|4206072|gb|AAD11407.1| Length: 333 Strand: Plus hevein-like protein [Sambucus nigra] Score:253 bits(646), Expect:5e-65, Identities:120/127(94%), Positives:124/127(98%), Gaps:0.127(0%) gi|420607 206 NYYYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD 0 ||.||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD gi|420607 266 QVPSYGVISEIIDSNIGHQSSLDTITTSIGYYKRYCDMLEVSYGDNLKNWFDETPFSKVA 60 |||||||||.||.||.||||.||||||||||||||||||||||||||.||||||||.||| 2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA gi|420607 326 HIKMSVV 333 120 ||||||| 127 2 120 HIKMSVV 127 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|16903131|gb|AAL30421.1|AF434174_1") self.assertEqual(hit.target.name, "AAL30421") self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=330)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 986.0) self.assertAlmostEqual(hsp.annotations["bit score"], 384.415014134554) self.assertAlmostEqual( hsp.annotations["evalue"], 2.04729155722083e-104, places=118 ) self.assertEqual(hsp.annotations["identity"], 190) self.assertEqual(hsp.annotations["positive"], 191) self.assertEqual(hsp.annotations["gaps"], 36) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 679.0) self.assertAlmostEqual(hsp.annotations["bit score"], 266.158744327927) self.assertAlmostEqual( hsp.annotations["evalue"], 8.12576171382949e-69, places=83 ) self.assertEqual(hsp.annotations["identity"], 126) self.assertEqual(hsp.annotations["positive"], 126) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[203, 330], [ 0, 127]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 127)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(127))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049") self.assertEqual( repr(hsp.target.seq), "Seq({203: 'NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV'}, length=330)", ) self.assertEqual(hsp.target.id, "gi|16903131|gb|AAL30421.1|AF434174_1") self.assertEqual(hsp.target.name, "AAL30421") self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKV HIKMSVV", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 127 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|16903131|gb|AAL30421.1|AF434174_1 Length: 330 Strand: Plus hevein-like protein [Sambucus nigra] Score:266 bits(679), Expect:8e-69, Identities:126/127(99%), Positives:126/127(99%), Gaps:0.127(0%) gi|169031 203 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD gi|169031 263 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||. 2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA gi|169031 323 HIKMSVV 330 120 ||||||| 127 2 120 HIKMSVV 127 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|16903133|gb|AAL30422.1|AF434175_1") self.assertEqual(hit.target.name, "AAL30422") self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=336)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 713.0) self.assertAlmostEqual(hsp.annotations["bit score"], 279.255529909117) self.assertAlmostEqual( hsp.annotations["evalue"], 9.27553088557319e-73, places=87 ) self.assertEqual(hsp.annotations["identity"], 148) self.assertEqual(hsp.annotations["positive"], 162) self.assertEqual(hsp.annotations["gaps"], 40) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 620.0) self.assertAlmostEqual(hsp.annotations["bit score"], 243.431969348803) self.assertAlmostEqual( hsp.annotations["evalue"], 5.64033703812707e-62, places=76 ) self.assertEqual(hsp.annotations["identity"], 115) self.assertEqual(hsp.annotations["positive"], 120) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[209, 336], [ 0, 127]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 127)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(127))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049") self.assertEqual( repr(hsp.target.seq), "Seq({209: 'NYNYGLAGEAIGIDLVNDPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...HVV'}, length=336)", ) self.assertEqual(hsp.target.id, "gi|16903133|gb|AAL30422.1|AF434175_1") self.assertEqual(hsp.target.name, "AAL30422") self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "NYNYGLAGEA+GIDLVN PDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINAN EASDQVPSYGV+S IINSN GH+SGLD ITTSIGYYKRYCDMLEVSYGDNL+NWFDETPF+KVA IKM VV", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 127 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|16903133|gb|AAL30422.1|AF434175_1 Length: 336 Strand: Plus hevein-like protein [Sambucus nigra] Score:243 bits(620), Expect:6e-62, Identities:115/127(91%), Positives:120/127(94%), Gaps:0.127(0%) gi|169031 209 NYNYGLAGEAIGIDLVNDPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANYEASD 0 ||||||||||.||||||.|||||||||||||||||||||||||||||||||||||.|||| 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD gi|169031 269 QVPSYGVLSNIINSNSGHKSGLDIITTSIGYYKRYCDMLEVSYGDNLKNWFDETPFSKVA 60 |||||||.|.|||||.||.||||.|||||||||||||||||||||||.||||||||.||| 2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA gi|169031 329 RIKMHVV 336 120 .|||.|| 127 2 120 HIKMSVV 127 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|30691147|gb|AAO17294.1|") self.assertEqual(hit.target.name, "AAO17294") self.assertEqual(hit.target.description, "chitinase [Ficus carica]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=321)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 481.0) self.assertAlmostEqual(hsp.annotations["bit score"], 189.889228296291) self.assertAlmostEqual( hsp.annotations["evalue"], 7.40075731140555e-46, places=60 ) self.assertEqual(hsp.annotations["identity"], 113) self.assertEqual(hsp.annotations["positive"], 138) self.assertEqual(hsp.annotations["gaps"], 49) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 426.0) self.assertAlmostEqual(hsp.annotations["bit score"], 168.703251620836) self.assertAlmostEqual( hsp.annotations["evalue"], 1.76559312729927e-39, places=53 ) self.assertEqual(hsp.annotations["identity"], 81) self.assertEqual(hsp.annotations["positive"], 99) self.assertEqual(hsp.annotations["gaps"], 10) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[202, 261, 261, 266, 266, 308, 309, 320], [ 0, 59, 60, 65, 73, 115, 115, 126]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 127)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...MSV')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(126))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2046") self.assertEqual( repr(hsp.target.seq), "Seq({202: 'NHNYGLVGEALGIDLINNPDLVATDPVVSFKTAIWFWMTRHQNKPSFHGVIINA...MPV'}, length=321)", ) self.assertEqual(hsp.target.id, "gi|30691147|gb|AAO17294.1|") self.assertEqual(hsp.target.name, "AAO17294") self.assertEqual(hsp.target.description, "chitinase [Ficus carica]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "N+NYGL GEALGIDL+N+PDLVATDP+VSFKTAIWFWMT+H N PS H ++INANSE S +P++ SNFG +S LD + SIGYYKRYCDML+VS+GDNL+ W+D TP F+ V+ I M V", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 126 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|30691147|gb|AAO17294.1| Length: 321 Strand: Plus chitinase [Ficus carica] Score:168 bits(426), Expect:2e-39, Identities:81/127(64%), Positives:99/127(78%), Gaps:10.127(8%) gi|306911 202 NHNYGLVGEALGIDLINNPDLVATDPVVSFKTAIWFWMTRHQNKPSFHGVIINANSEPS- 0 |.||||.||||||||.|.||||||||.||||||||||||.|.|.||.|...||||||.|- 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD gi|306911 261 HIPNH--------SNFGQESVLDVVNRSIGYYKRYCDMLKVSFGDNLKYWYDGTPNFSDV 60 ..|..--------||||..|.||....||||||||||||.||.||||..|.|.||-|..| 2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETP-FTKV gi|306911 313 SRIGMPV 320 120 ..|.|.| 127 2 119 AHIKMSV 126 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|222139388|gb|ACM45713.1|") self.assertEqual(hit.target.name, "ACM45713") self.assertEqual(hit.target.description, "class I chitinase [Pyrus pyrifolia]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=317)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 469.0) self.assertAlmostEqual(hsp.annotations["bit score"], 185.266833385283) self.assertAlmostEqual( hsp.annotations["evalue"], 1.82286993845418e-44, places=58 ) self.assertEqual(hsp.annotations["identity"], 111) self.assertEqual(hsp.annotations["positive"], 137) self.assertEqual(hsp.annotations["gaps"], 50) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 318.0) self.assertAlmostEqual(hsp.annotations["bit score"], 127.101697421762) self.assertAlmostEqual( hsp.annotations["evalue"], 5.89123449548921e-27, places=40 ) self.assertEqual(hsp.annotations["identity"], 62) self.assertEqual(hsp.annotations["positive"], 84) self.assertEqual(hsp.annotations["gaps"], 7) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[195, 247, 252, 284, 285, 309, 309, 316], [ 0, 52, 52, 84, 84, 108, 109, 116]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 122)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...TPF')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(116))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2016") self.assertEqual( repr(hsp.target.seq), "Seq({195: 'NYNYGQAGKAIGKDLINNPDLVATDPVVSFKTAIWFWMTPQGNKPSSHDVITGR...RPF'}, length=317)", ) self.assertEqual(hsp.target.id, "gi|222139388|gb|ACM45713.1|") self.assertEqual(hsp.target.name, "ACM45713") self.assertEqual(hsp.target.description, "class I chitinase [Pyrus pyrifolia]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "NYNYG AG+A+G DL+N+PDLVATDP+VSFKTAIWFWMT N PS HD++ + ++ +VP YGVI+ IIN G D + + IG+Y+RYC +L V+ GDNL+ +++ PF", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 116 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|222139388|gb|ACM45713.1| Length: 317 Strand: Plus class I chitinase [Pyrus pyrifolia] Score:127 bits(318), Expect:6e-27, Identities:62/122(51%), Positives:84/122(69%), Gaps:7.122(6%) gi|222139 195 NYNYGQAGKAIGKDLINNPDLVATDPVVSFKTAIWFWMTPQGNKPSSHDVITGRWSPSTA 0 |||||.||.|.|.||.|.||||||||.||||||||||||...|.||.||...-----... 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILI-----NAN gi|222139 255 DRSAGRVPGYGVITNIINGGVECGKGQDARVASRIGFYRRYCQILGVNPGDNLD-CYNQR 60 ......||.||||..|||.......|.|.-....||.|.|||..|.|..||||.-..... 2 55 SEASDQVPSYGVISKIINSNFGHQSGLDT-ITTSIGYYKRYCDMLEVSYGDNLENWFDET gi|222139 314 PF 316 120 || 122 2 114 PF 116 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|23496435|dbj|BAB40817.2|") self.assertEqual(hit.target.name, "BAB40817") self.assertEqual(hit.target.description, "endochitinase MCHT-2 [Cucumis melo]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=311)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 460.0) self.assertAlmostEqual(hsp.annotations["bit score"], 181.800037202027) self.assertAlmostEqual( hsp.annotations["evalue"], 2.01541888137674e-43, places=57 ) self.assertEqual(hsp.annotations["identity"], 109) self.assertEqual(hsp.annotations["positive"], 132) self.assertEqual(hsp.annotations["gaps"], 54) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 285.0) self.assertAlmostEqual(hsp.annotations["bit score"], 114.39011141649) self.assertAlmostEqual( hsp.annotations["evalue"], 3.95161831690075e-23, places=37 ) self.assertEqual(hsp.annotations["identity"], 56) self.assertEqual(hsp.annotations["positive"], 75) self.assertEqual(hsp.annotations["gaps"], 7) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[191, 211, 211, 242, 247, 277, 278, 304], [ 0, 20, 21, 52, 52, 82, 82, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 114)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...NLE')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..1992") self.assertEqual( repr(hsp.target.seq), "Seq({191: 'NYNYGPAGKAIGAPLLTNPDTATDPVTSFKTALWFWMTAQGNKPSCHNVITGNW...NLD'}, length=311)", ) self.assertEqual(hsp.target.id, "gi|23496435|dbj|BAB40817.2|") self.assertEqual(hsp.target.name, "BAB40817") self.assertEqual(hsp.target.description, "endochitinase MCHT-2 [Cucumis melo]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "NYNYG AG+A+G L+ +PD ATDP+ SFKTA+WFWMT N PS H+++ ++ A+ +VP YGVI+ IIN G D + IG+YKRYCDML + YG+NL+", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 108 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|23496435|dbj|BAB40817.2| Length: 311 Strand: Plus endochitinase MCHT-2 [Cucumis melo] Score:114 bits(285), Expect:4e-23, Identities:56/114(49%), Positives:75/114(66%), Gaps:7.114(6%) gi|234964 191 NYNYGPAGKAIGAPLLTNPD-TATDPVTSFKTALWFWMTAQGNKPSCHNVITGNWQPSSA 0 |||||.||.|.|..|...||-.||||..|||||.|||||...|.||.|....-----... 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILI-----NAN gi|234964 250 DNAAGRVPGYGVITNIINGGLECGRGPDDRVKDRIGFYKRYCDMLGIGYGNNLD 304 60 ..|...||.||||..|||.......|.-|.....||.||||||||...||.||. 114 2 55 SEASDQVPSYGVISKIINSNFGHQSGL-DTITTSIGYYKRYCDMLEVSYGDNLE 108 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|82621253|gb|ABB86300.1|") self.assertEqual(hit.target.name, "ABB86300") self.assertEqual(hit.target.description, "chitinase [Ficus awkeotsang]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=301)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 459.0) self.assertAlmostEqual(hsp.annotations["bit score"], 181.414837626109) self.assertAlmostEqual( hsp.annotations["evalue"], 2.6322185753765e-43, places=56 ) self.assertEqual(hsp.annotations["identity"], 114) self.assertEqual(hsp.annotations["positive"], 134) self.assertEqual(hsp.annotations["gaps"], 50) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 359.0) self.assertAlmostEqual(hsp.annotations["bit score"], 142.894880034374) self.assertAlmostEqual( hsp.annotations["evalue"], 1.03749166509001e-31, places=45 ) self.assertEqual(hsp.annotations["identity"], 67) self.assertEqual(hsp.annotations["positive"], 83) self.assertEqual(hsp.annotations["gaps"], 9) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[203, 263, 263, 268, 268, 301], [ 0, 60, 61, 66, 74, 107]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 107)) self.assertEqual( repr(hsp.query.seq), "Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...DNL')", ) self.assertEqual(hsp.query.id, "2") self.assertEqual( hsp.query.description, "gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(107))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "2:1669..1989") self.assertEqual( repr(hsp.target.seq), "Seq({203: 'NHNYGLVGEALGIDLINNPELVATDPVISFKTAIWFWMARYEDKPSFHDVIINA...DNL'}, length=301)", ) self.assertEqual(hsp.target.id, "gi|82621253|gb|ABB86300.1|") self.assertEqual(hsp.target.name, "ABB86300") self.assertEqual(hsp.target.description, "chitinase [Ficus awkeotsang]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "N+NYGL GEALGIDL+N+P+LVATDP++SFKTAIWFWM ++++ PS HD++INAN EASD +P +G N G +S LD + SIGYYKRYCDML VS DNL", ) self.assertEqual( str(hsp), """\ Query : 2 Length: 107 Strand: Plus gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds Target: gi|82621253|gb|ABB86300.1| Length: 301 Strand: Plus chitinase [Ficus awkeotsang] Score:142 bits(359), Expect:1e-31, Identities:67/107(63%), Positives:83/107(78%), Gaps:9.107(8%) gi|826212 203 NHNYGLVGEALGIDLINNPELVATDPVISFKTAIWFWMARYEDKPSFHDVIINANFEASD 0 |.||||.||||||||.|.|.||||||..||||||||||......||.||..||||.|||| 2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD gi|826212 263 -IPYHG--------NSGQESSLDVVNRSIGYYKRYCDMLGVSCEDNL 301 60 -.|..|--------|.|..|.||....||||||||||||.||..||| 107 2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNL 107 """, ) record = next(records) self.assertEqual(record.num, 3) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "3") self.assertEqual( record.query.description, "gi|5690369|gb|AF158246.1|AF158246 Cricetulus griseus glucose phosphate isomerase (GPI) gene, partial intron sequence", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=550)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 8994603) self.assertEqual(record.stat["db-len"], -1216159329) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 108443629616) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 0) record = next(records) self.assertEqual(record.num, 4) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "4") self.assertEqual( record.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=655)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 8994603) self.assertEqual(record.stat["db-len"], -1216159329) self.assertEqual(record.stat["hsp-len"], 0) self.assertAlmostEqual(record.stat["eff-space"], 165344802738) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|166343825|gb|ABY86655.1|") self.assertEqual(hit.target.name, "ABY86655") self.assertEqual(hit.target.description, "beta-tubulin 4 [Gossypium hirsutum]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=448)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1048.0) self.assertAlmostEqual(hsp.annotations["bit score"], 408.29738784143) self.assertAlmostEqual( hsp.annotations["evalue"], 2.26145081918239e-112, places=126 ) self.assertEqual(hsp.annotations["identity"], 196) self.assertEqual(hsp.annotations["positive"], 197) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHIQAGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDNDLQLERVNVYYNEA...CMV'}, length=448)", ) self.assertEqual(hsp.target.id, "gi|166343825|gb|ABY86655.1|") self.assertEqual(hsp.target.name, "ABY86655") self.assertEqual(hsp.target.description, "beta-tubulin 4 [Gossypium hirsutum]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILHIQAGQCGNQIGA FWEVVCAEHGI+STGRYQGDNDLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|166343825|gb|ABY86655.1| Length: 448 Strand: Plus beta-tubulin 4 [Gossypium hirsutum] Score:408 bits(1048), Expect:2e-112, Identities:196/201(98%), Positives:197/201(98%), Gaps:0.201(0%) gi|166343 0 MREILHIQAGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDNDLQLERVNVYYNEASCGRFV 0 ||||||||||||||||||.|||||||||||.||||||||||||||||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|166343 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|166343 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|166343 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|223549899|gb|EEF51386.1|") self.assertEqual(hit.target.name, "EEF51386") self.assertEqual( hit.target.description, "tubulin beta chain, putative [Ricinus communis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=448)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1044.0) self.assertAlmostEqual(hsp.annotations["bit score"], 406.756589537761) self.assertAlmostEqual( hsp.annotations["evalue"], 6.57981456092236e-112, places=126 ) self.assertEqual(hsp.annotations["identity"], 195) self.assertEqual(hsp.annotations["positive"], 196) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDNDLQLERVNVYYNEA...CMV'}, length=448)", ) self.assertEqual(hsp.target.id, "gi|223549899|gb|EEF51386.1|") self.assertEqual(hsp.target.name, "EEF51386") self.assertEqual( hsp.target.description, "tubulin beta chain, putative [Ricinus communis]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQGDNDLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|223549899|gb|EEF51386.1| Length: 448 Strand: Plus tubulin beta chain, putative [Ricinus communis] Score:406 bits(1044), Expect:7e-112, Identities:195/201(97%), Positives:196/201(98%), Gaps:0.201(0%) gi|223549 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDNDLQLERVNVYYNEASCGRFV 0 ||||||||.|||||||||.|||||||||||.||||||||||||||||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|223549 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|223549 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|223549 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|18420724|ref|NP_568437.1|") self.assertEqual(hit.target.name, "NP_568437") self.assertEqual( hit.target.description, "TUB8 (tubulin beta-8) [Arabidopsis thaliana] >gi|27735261|sp|P29516.2|TBB8_ARATH RecName: Full=Tubulin beta-8 chain; AltName: Full=Beta-8-tubulin >gi|10176853|dbj|BAB10059.1| beta tubulin [Arabidopsis thaliana]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=449)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1040.0) self.assertAlmostEqual(hsp.annotations["bit score"], 405.215791234091) self.assertAlmostEqual( hsp.annotations["evalue"], 1.91443295113426e-111, places=125 ) self.assertEqual(hsp.annotations["identity"], 194) self.assertEqual(hsp.annotations["positive"], 196) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGENDLQLERVNVYYNEA...CMV'}, length=449)", ) self.assertEqual(hsp.target.id, "gi|18420724|ref|NP_568437.1|") self.assertEqual(hsp.target.name, "NP_568437") self.assertEqual( hsp.target.description, "TUB8 (tubulin beta-8) [Arabidopsis thaliana] >gi|27735261|sp|P29516.2|TBB8_ARATH RecName: Full=Tubulin beta-8 chain; AltName: Full=Beta-8-tubulin >gi|10176853|dbj|BAB10059.1| beta tubulin [Arabidopsis thaliana]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQG+NDLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|18420724|ref|NP_568437.1| Length: 449 Strand: Plus TUB8 (tubulin beta-8) [Arabidopsis thaliana] >gi|27735261|sp|P29516.2|TBB8_ARATH RecName: Full=Tubulin beta-8 chain; AltName: Full=Beta-8-tubulin >gi|10176853|dbj|BAB10059.1| beta tubulin [Arabidopsis thaliana] Score:405 bits(1040), Expect:2e-111, Identities:194/201(97%), Positives:196/201(98%), Gaps:0.201(0%) gi|184207 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGENDLQLERVNVYYNEASCGRFV 0 ||||||||.|||||||||.|||||||||||.|||||||.||||||||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|184207 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|184207 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|184207 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|225426385|ref|XP_002271992.1|") self.assertEqual(hit.target.name, "XP_002271992") self.assertEqual( hit.target.description, "PREDICTED: hypothetical protein [Vitis vinifera] >gi|157356601|emb|CAO62796.1| unnamed protein product [Vitis vinifera]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=447)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1034.0) self.assertAlmostEqual(hsp.annotations["bit score"], 402.904593778587) self.assertAlmostEqual( hsp.annotations["evalue"], 9.50123195540709e-111, places=125 ) self.assertEqual(hsp.annotations["identity"], 193) self.assertEqual(hsp.annotations["positive"], 195) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYHGDSDLQLERVNVYYNEA...CMV'}, length=447)", ) self.assertEqual(hsp.target.id, "gi|225426385|ref|XP_002271992.1|") self.assertEqual(hsp.target.name, "XP_002271992") self.assertEqual( hsp.target.description, "PREDICTED: hypothetical protein [Vitis vinifera] >gi|157356601|emb|CAO62796.1| unnamed protein product [Vitis vinifera]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRY GD+DLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|225426385|ref|XP_002271992.1| Length: 447 Strand: Plus PREDICTED: hypothetical protein [Vitis vinifera] >gi|157356601|emb|CAO62796.1| unnamed protein product [Vitis vinifera] Score:402 bits(1034), Expect:1e-110, Identities:193/201(96%), Positives:195/201(97%), Gaps:0.201(0%) gi|225426 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYHGDSDLQLERVNVYYNEASCGRFV 0 ||||||||.|||||||||.|||||||||||.|||||.||.|||||||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|225426 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|225426 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|225426 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|15451226|gb|AAK96884.1|") self.assertEqual(hit.target.name, "AAK96884") self.assertEqual( hit.target.description, "beta tubulin [Arabidopsis thaliana] >gi|20148289|gb|AAM10035.1| beta tubulin [Arabidopsis thaliana]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=449)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1034.0) self.assertAlmostEqual(hsp.annotations["bit score"], 402.904593778587) self.assertAlmostEqual( hsp.annotations["evalue"], 9.50123195540709e-111, places=125 ) self.assertEqual(hsp.annotations["identity"], 193) self.assertEqual(hsp.annotations["positive"], 195) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGEKDLQLERVNVYYNEA...CMV'}, length=449)", ) self.assertEqual(hsp.target.id, "gi|15451226|gb|AAK96884.1|") self.assertEqual(hsp.target.name, "AAK96884") self.assertEqual( hsp.target.description, "beta tubulin [Arabidopsis thaliana] >gi|20148289|gb|AAM10035.1| beta tubulin [Arabidopsis thaliana]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQG+ DLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|15451226|gb|AAK96884.1| Length: 449 Strand: Plus beta tubulin [Arabidopsis thaliana] >gi|20148289|gb|AAM10035.1| beta tubulin [Arabidopsis thaliana] Score:402 bits(1034), Expect:1e-110, Identities:193/201(96%), Positives:195/201(97%), Gaps:0.201(0%) gi|154512 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGEKDLQLERVNVYYNEASCGRFV 0 ||||||||.|||||||||.|||||||||||.|||||||..|||||||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|154512 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|154512 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|154512 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|225470745|ref|XP_002267380.1|") self.assertEqual(hit.target.name, "XP_002267380") self.assertEqual( hit.target.description, "PREDICTED: hypothetical protein [Vitis vinifera] >gi|157327486|emb|CAO15467.1| unnamed protein product [Vitis vinifera]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=449)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1033.0) self.assertAlmostEqual(hsp.annotations["bit score"], 402.51939420267) self.assertAlmostEqual( hsp.annotations["evalue"], 1.24089932237309e-110, places=124 ) self.assertEqual(hsp.annotations["identity"], 192) self.assertEqual(hsp.annotations["positive"], 195) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHVQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDTELQLERVNVYYNEA...CMV'}, length=449)", ) self.assertEqual(hsp.target.id, "gi|225470745|ref|XP_002267380.1|") self.assertEqual(hsp.target.name, "XP_002267380") self.assertEqual( hsp.target.description, "PREDICTED: hypothetical protein [Vitis vinifera] >gi|157327486|emb|CAO15467.1| unnamed protein product [Vitis vinifera]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILH+Q GQCGNQIGA FWEVVCAEHGI+STGRYQGD +LQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|225470745|ref|XP_002267380.1| Length: 449 Strand: Plus PREDICTED: hypothetical protein [Vitis vinifera] >gi|157327486|emb|CAO15467.1| unnamed protein product [Vitis vinifera] Score:402 bits(1033), Expect:1e-110, Identities:192/201(96%), Positives:195/201(97%), Gaps:0.201(0%) gi|225470 0 MREILHVQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDTELQLERVNVYYNEASCGRFV 0 ||||||.|.|||||||||.|||||||||||.||||||||..||||||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|225470 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|225470 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|225470 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|586076|sp|P37392.1|TBB1_LUPAL") self.assertEqual(hit.target.name, "P37392") self.assertEqual( hit.target.description, "RecName: Full=Tubulin beta-1 chain; AltName: Full=Beta-1-tubulin >gi|402636|emb|CAA49736.1| Beta tubulin 1 [Lupinus albus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=447)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1033.0) self.assertAlmostEqual(hsp.annotations["bit score"], 402.51939420267) self.assertAlmostEqual( hsp.annotations["evalue"], 1.24089932237309e-110, places=124 ) self.assertEqual(hsp.annotations["identity"], 193) self.assertEqual(hsp.annotations["positive"], 195) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYGGDNELQLERVNVYYNEA...CMV'}, length=447)", ) self.assertEqual(hsp.target.id, "gi|586076|sp|P37392.1|TBB1_LUPAL") self.assertEqual(hsp.target.name, "P37392") self.assertEqual( hsp.target.description, "RecName: Full=Tubulin beta-1 chain; AltName: Full=Beta-1-tubulin >gi|402636|emb|CAA49736.1| Beta tubulin 1 [Lupinus albus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRY GDN+LQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|586076|sp|P37392.1|TBB1_LUPAL Length: 447 Strand: Plus RecName: Full=Tubulin beta-1 chain; AltName: Full=Beta-1-tubulin >gi|402636|emb|CAA49736.1| Beta tubulin 1 [Lupinus albus] Score:402 bits(1033), Expect:1e-110, Identities:193/201(96%), Positives:195/201(97%), Gaps:0.201(0%) gi|586076 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYGGDNELQLERVNVYYNEASCGRFV 0 ||||||||.|||||||||.|||||||||||.|||||.|||.||||||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|586076 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|586076 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|586076 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|224104341|ref|XP_002313404.1|") self.assertEqual(hit.target.name, "XP_002313404") self.assertEqual( hit.target.description, "tubulin, beta chain [Populus trichocarpa] >gi|222849812|gb|EEE87359.1| tubulin, beta chain [Populus trichocarpa]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=451)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1031.0) self.assertAlmostEqual(hsp.annotations["bit score"], 401.748995050835) self.assertAlmostEqual( hsp.annotations["evalue"], 2.1166536544662e-110, places=123 ) self.assertEqual(hsp.annotations["identity"], 192) self.assertEqual(hsp.annotations["positive"], 195) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDSPLQLERINVYYNEA...CMV'}, length=451)", ) self.assertEqual(hsp.target.id, "gi|224104341|ref|XP_002313404.1|") self.assertEqual(hsp.target.name, "XP_002313404") self.assertEqual( hsp.target.description, "tubulin, beta chain [Populus trichocarpa] >gi|222849812|gb|EEE87359.1| tubulin, beta chain [Populus trichocarpa]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQGD+ LQLER+NVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|224104341|ref|XP_002313404.1| Length: 451 Strand: Plus tubulin, beta chain [Populus trichocarpa] >gi|222849812|gb|EEE87359.1| tubulin, beta chain [Populus trichocarpa] Score:401 bits(1031), Expect:2e-110, Identities:192/201(96%), Positives:195/201(97%), Gaps:0.201(0%) gi|224104 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDSPLQLERINVYYNEASCGRFV 0 ||||||||.|||||||||.|||||||||||.||||||||..|||||.||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|224104 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|224104 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|224104 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|223549679|gb|EEF51167.1|") self.assertEqual(hit.target.name, "EEF51167") self.assertEqual( hit.target.description, "tubulin beta chain, putative [Ricinus communis]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=446)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1029.0) self.assertAlmostEqual(hsp.annotations["bit score"], 400.978595899) self.assertAlmostEqual( hsp.annotations["evalue"], 3.61046429165375e-110, places=124 ) self.assertEqual(hsp.annotations["identity"], 191) self.assertEqual(hsp.annotations["positive"], 194) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHVQGGQCGNQIGAKFWEVVCAEHGIDSTGRYHGDTDLQLERVNVYYNEA...CMV'}, length=446)", ) self.assertEqual(hsp.target.id, "gi|223549679|gb|EEF51167.1|") self.assertEqual(hsp.target.name, "EEF51167") self.assertEqual( hsp.target.description, "tubulin beta chain, putative [Ricinus communis]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILH+Q GQCGNQIGA FWEVVCAEHGI+STGRY GD DLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISK+REEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|223549679|gb|EEF51167.1| Length: 446 Strand: Plus tubulin beta chain, putative [Ricinus communis] Score:400 bits(1029), Expect:4e-110, Identities:191/201(95%), Positives:194/201(97%), Gaps:0.201(0%) gi|223549 0 MREILHVQGGQCGNQIGAKFWEVVCAEHGIDSTGRYHGDTDLQLERVNVYYNEASCGRFV 0 ||||||.|.|||||||||.|||||||||||.|||||.||.|||||||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|223549 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|223549 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKMREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.|||||||||||||.||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|223549 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|224058553|ref|XP_002299541.1|") self.assertEqual(hit.target.name, "XP_002299541") self.assertEqual( hit.target.description, "tubulin, beta chain [Populus trichocarpa] >gi|222846799|gb|EEE84346.1| tubulin, beta chain [Populus trichocarpa]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=447)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1029.0) self.assertAlmostEqual(hsp.annotations["bit score"], 400.978595899) self.assertAlmostEqual( hsp.annotations["evalue"], 3.61046429165375e-110, places=124 ) self.assertEqual(hsp.annotations["identity"], 192) self.assertEqual(hsp.annotations["positive"], 195) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 201], [ 0, 201]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 201)) self.assertEqual( repr(hsp.query.seq), "Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')", ) self.assertEqual(hsp.query.id, "4") self.assertEqual( hsp.query.description, "gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(201))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "4:50..652") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDSALQIERVNVYYNEA...CMV'}, length=447)", ) self.assertEqual(hsp.target.id, "gi|224058553|ref|XP_002299541.1|") self.assertEqual(hsp.target.name, "XP_002299541") self.assertEqual( hsp.target.description, "tubulin, beta chain [Populus trichocarpa] >gi|222846799|gb|EEE84346.1| tubulin, beta chain [Populus trichocarpa]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQGD+ LQ+ERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV", ) self.assertEqual( str(hsp), """\ Query : 4 Length: 201 Strand: Plus gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence Target: gi|224058553|ref|XP_002299541.1| Length: 447 Strand: Plus tubulin, beta chain [Populus trichocarpa] >gi|222846799|gb|EEE84346.1| tubulin, beta chain [Populus trichocarpa] Score:400 bits(1029), Expect:4e-110, Identities:192/201(96%), Positives:195/201(97%), Gaps:0.201(0%) gi|224058 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDSALQIERVNVYYNEASCGRFV 0 ||||||||.|||||||||.|||||||||||.||||||||..||.|||||||||||||||| 4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV gi|224058 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.|||| 4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV gi|224058 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV 120 ||||||||||||||||||||.||||||||||||||||||||||||||||||||||||||| 4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV gi|224058 180 EPYNATLSVHQLVENADECMV 201 180 ||||||||||.|||||||||| 201 4 180 EPYNATLSVHXLVENADECMV 201 """, ) record = next(records) self.assertEqual(record.num, 5) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "5") self.assertEqual( record.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=623)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 8994603) self.assertEqual(record.stat["db-len"], -1216159329) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 147032237429) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|110740644|dbj|BAE98425.1|") self.assertEqual(hit.target.name, "BAE98425") self.assertEqual( hit.target.description, "hypothetical protein [Arabidopsis thaliana]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=80)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 231.0) self.assertAlmostEqual(hsp.annotations["bit score"], 93.5893343169526) self.assertAlmostEqual( hsp.annotations["evalue"], 5.57283114448317e-19, places=33 ) self.assertEqual(hsp.annotations["identity"], 42) self.assertEqual(hsp.annotations["positive"], 45) self.assertEqual(hsp.annotations["gaps"], 1) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 53.0) self.assertAlmostEqual(hsp.annotations["bit score"], 25.0238098036637) self.assertAlmostEqual( hsp.annotations["evalue"], 5.57283114448317e-19, places=33 ) self.assertEqual(hsp.annotations["identity"], 13) self.assertEqual(hsp.annotations["positive"], 13) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[53, 70], [ 0, 17]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 17)) self.assertEqual(repr(hsp.query.seq), "Seq('RKLVSRALRCAVGLNKS')") self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(17))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "5:455..505") self.assertEqual( repr(hsp.target.seq), "Seq({53: 'RKLVSRVLPHAVGLNPS'}, length=80)" ) self.assertEqual(hsp.target.id, "gi|110740644|dbj|BAE98425.1|") self.assertEqual(hsp.target.name, "BAE98425") self.assertEqual( hsp.target.description, "hypothetical protein [Arabidopsis thaliana]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "RKLVSR L AVGLN S") self.assertEqual( str(hsp), """\ Query : 5 Length: 17 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|110740644|dbj|BAE98425.1| Length: 80 Strand: Plus hypothetical protein [Arabidopsis thaliana] Score:25 bits(53), Expect:6e-19, Identities:13/17(76%), Positives:13/17(76%), Gaps:0.17(0%) gi|110740 53 RKLVSRVLPHAVGLNPS 70 0 ||||||.|..|||||.| 17 5 0 RKLVSRALRCAVGLNKS 17 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|226453533|gb|EEH50844.1|") self.assertEqual(hit.target.name, "EEH50844") self.assertEqual( hit.target.description, "predicted protein [Micromonas pusilla CCMP1545]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=81)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 238.0) self.assertAlmostEqual(hsp.annotations["bit score"], 96.2857313483741) self.assertAlmostEqual( hsp.annotations["evalue"], 1.69151855577931e-18, places=32 ) self.assertEqual(hsp.annotations["identity"], 42) self.assertEqual(hsp.annotations["positive"], 45) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 49], [ 0, 49]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 49)) self.assertEqual( repr(hsp.query.seq), "Seq('MKNVAKCDTWCELQNPVNHRVFERKLRPKPLGRGHVCLGVSHRVAPNPF')", ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(49))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "5:283..429") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MKNVAKCDTWCELQNPVNHRVFERKLRPKPSGRGHVCLGVTNRRPPSSF'}, length=81)", ) self.assertEqual(hsp.target.id, "gi|226453533|gb|EEH50844.1|") self.assertEqual(hsp.target.name, "EEH50844") self.assertEqual( hsp.target.description, "predicted protein [Micromonas pusilla CCMP1545]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKNVAKCDTWCELQNPVNHRVFERKLRPKP GRGHVCLGV++R P+ F", ) self.assertEqual( str(hsp), """\ Query : 5 Length: 49 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|226453533|gb|EEH50844.1| Length: 81 Strand: Plus predicted protein [Micromonas pusilla CCMP1545] Score:96 bits(238), Expect:2e-18, Identities:42/49(86%), Positives:45/49(92%), Gaps:0.49(0%) gi|226453 0 MKNVAKCDTWCELQNPVNHRVFERKLRPKPSGRGHVCLGVTNRRPPSSF 49 0 ||||||||||||||||||||||||||||||.|||||||||..|..|..| 49 5 0 MKNVAKCDTWCELQNPVNHRVFERKLRPKPLGRGHVCLGVSHRVAPNPF 49 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|168069582|ref|XP_001786502.1|") self.assertEqual(hit.target.name, "XP_001786502") self.assertEqual( hit.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162661153|gb|EDQ48685.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=88)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 183.0) self.assertAlmostEqual(hsp.annotations["bit score"], 75.0997546729196) self.assertAlmostEqual( hsp.annotations["evalue"], 4.03544314604194e-12, places=26 ) self.assertEqual(hsp.annotations["identity"], 37) self.assertEqual(hsp.annotations["positive"], 39) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 2, 44], [ 0, 42]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 42)) self.assertEqual( repr(hsp.query.seq), "Seq('ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')" ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(42))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..370)") self.assertEqual( repr(hsp.target.seq), "Seq({2: 'ASGATCVQKLDDSRDSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=88)", ) self.assertEqual(hsp.target.id, "gi|168069582|ref|XP_001786502.1|") self.assertEqual(hsp.target.name, "XP_001786502") self.assertEqual( hsp.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162661153|gb|EDQ48685.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "ASGATCVQKLD SRDSAIHT YRISLRSSS++EPRYPL RVV" ) self.assertEqual( str(hsp), """\ Query : 5 Length: 42 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|168069582|ref|XP_001786502.1| Length: 88 Strand: Plus predicted protein [Physcomitrella patens subsp. patens] >gi|162661153|gb|EDQ48685.1| predicted protein [Physcomitrella patens subsp. patens] Score:75 bits(183), Expect:4e-12, Identities:37/42(88%), Positives:39/42(93%), Gaps:0.42(0%) gi|168069 2 ASGATCVQKLDDSRDSAIHTTYRISLRSSSLQEPRYPLLRVV 44 0 |||||||||||.||||||||.|||||||||..||||||.||| 42 5 0 ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 42 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|168068558|ref|XP_001786120.1|") self.assertEqual(hit.target.name, "XP_001786120") self.assertEqual( hit.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162662102|gb|EDQ49068.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=130)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 178.0) self.assertAlmostEqual(hsp.annotations["bit score"], 73.1737567933329) self.assertAlmostEqual( hsp.annotations["evalue"], 1.53346675969648e-11, places=25 ) self.assertEqual(hsp.annotations["identity"], 36) self.assertEqual(hsp.annotations["positive"], 39) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 2, 44], [ 0, 42]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 42)) self.assertEqual( repr(hsp.query.seq), "Seq('ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')" ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(42))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..370)") self.assertEqual( repr(hsp.target.seq), "Seq({2: 'ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=130)", ) self.assertEqual(hsp.target.id, "gi|168068558|ref|XP_001786120.1|") self.assertEqual(hsp.target.name, "XP_001786120") self.assertEqual( hsp.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162662102|gb|EDQ49068.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "ASGATCVQKLD SR+SAIHT YRISLRSSS++EPRYPL RVV" ) self.assertEqual( str(hsp), """\ Query : 5 Length: 42 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|168068558|ref|XP_001786120.1| Length: 130 Strand: Plus predicted protein [Physcomitrella patens subsp. patens] >gi|162662102|gb|EDQ49068.1| predicted protein [Physcomitrella patens subsp. patens] Score:73 bits(178), Expect:2e-11, Identities:36/42(86%), Positives:39/42(93%), Gaps:0.42(0%) gi|168068 2 ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV 44 0 |||||||||||.||.|||||.|||||||||..||||||.||| 42 5 0 ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 42 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|168068926|ref|XP_001786259.1|") self.assertEqual(hit.target.name, "XP_001786259") self.assertEqual( hit.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|168069965|ref|XP_001786641.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162660807|gb|EDQ48545.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162661808|gb|EDQ48929.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=148)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 178.0) self.assertAlmostEqual(hsp.annotations["bit score"], 73.1737567933329) self.assertAlmostEqual( hsp.annotations["evalue"], 1.53346675969648e-11, places=25 ) self.assertEqual(hsp.annotations["identity"], 36) self.assertEqual(hsp.annotations["positive"], 39) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 2, 44], [ 0, 42]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 42)) self.assertEqual( repr(hsp.query.seq), "Seq('ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')" ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(42))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..370)") self.assertEqual( repr(hsp.target.seq), "Seq({2: 'ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=148)", ) self.assertEqual(hsp.target.id, "gi|168068926|ref|XP_001786259.1|") self.assertEqual(hsp.target.name, "XP_001786259") self.assertEqual( hsp.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|168069965|ref|XP_001786641.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162660807|gb|EDQ48545.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162661808|gb|EDQ48929.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "ASGATCVQKLD SR+SAIHT YRISLRSSS++EPRYPL RVV" ) self.assertEqual( str(hsp), """\ Query : 5 Length: 42 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|168068926|ref|XP_001786259.1| Length: 148 Strand: Plus predicted protein [Physcomitrella patens subsp. patens] >gi|168069965|ref|XP_001786641.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162660807|gb|EDQ48545.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162661808|gb|EDQ48929.1| predicted protein [Physcomitrella patens subsp. patens] Score:73 bits(178), Expect:2e-11, Identities:36/42(86%), Positives:39/42(93%), Gaps:0.42(0%) gi|168068 2 ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV 44 0 |||||||||||.||.|||||.|||||||||..||||||.||| 42 5 0 ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 42 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|168070288|ref|XP_001786759.1|") self.assertEqual(hit.target.name, "XP_001786759") self.assertEqual( hit.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162660550|gb|EDQ48427.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=148)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 178.0) self.assertAlmostEqual(hsp.annotations["bit score"], 73.1737567933329) self.assertAlmostEqual( hsp.annotations["evalue"], 1.53346675969648e-11, places=25 ) self.assertEqual(hsp.annotations["identity"], 36) self.assertEqual(hsp.annotations["positive"], 39) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 2, 44], [ 0, 42]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 42)) self.assertEqual( repr(hsp.query.seq), "Seq('ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')" ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(42))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..370)") self.assertEqual( repr(hsp.target.seq), "Seq({2: 'ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=148)", ) self.assertEqual(hsp.target.id, "gi|168070288|ref|XP_001786759.1|") self.assertEqual(hsp.target.name, "XP_001786759") self.assertEqual( hsp.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162660550|gb|EDQ48427.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "ASGATCVQKLD SR+SAIHT YRISLRSSS++EPRYPL RVV" ) self.assertEqual( str(hsp), """\ Query : 5 Length: 42 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|168070288|ref|XP_001786759.1| Length: 148 Strand: Plus predicted protein [Physcomitrella patens subsp. patens] >gi|162660550|gb|EDQ48427.1| predicted protein [Physcomitrella patens subsp. patens] Score:73 bits(178), Expect:2e-11, Identities:36/42(86%), Positives:39/42(93%), Gaps:0.42(0%) gi|168070 2 ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV 44 0 |||||||||||.||.|||||.|||||||||..||||||.||| 42 5 0 ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 42 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|168068591|ref|XP_001786133.1|") self.assertEqual(hit.target.name, "XP_001786133") self.assertEqual( hit.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162662081|gb|EDQ49057.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=220)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 172.0) self.assertAlmostEqual(hsp.annotations["bit score"], 70.8625593378288) self.assertAlmostEqual( hsp.annotations["evalue"], 7.61051640442713e-11, places=25 ) self.assertEqual(hsp.annotations["identity"], 42) self.assertEqual(hsp.annotations["positive"], 50) self.assertEqual(hsp.annotations["gaps"], 8) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[142, 169, 172, 183, 183, 220], [ 0, 27, 27, 38, 43, 80]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 83)) self.assertEqual( repr(hsp.query.seq), "Seq('RPTAHRSARETNFRSQTVESRRKWVGGDAM*DAQADVPSA*WLRAQLAFKNSMV...IRC')", ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(80))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(5:256..495)") self.assertEqual( repr(hsp.target.seq), "Seq({142: 'RGLCHHADSDGQFHSTLPIKDIKRIGGCRDDALAGMPSDEPRAQLAFKNSMIHG...IRC'}, length=220)", ) self.assertEqual(hsp.target.id, "gi|168068591|ref|XP_001786133.1|") self.assertEqual(hsp.target.name, "XP_001786133") self.assertEqual( hsp.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162662081|gb|EDQ49057.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "R H + + F S K +GG DA+ +D P RAQLAFKNSM+HGILQFT IAFR VLHRC+S+DIRC", ) self.assertEqual( str(hsp), """\ Query : 5 Length: 80 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|168068591|ref|XP_001786133.1| Length: 220 Strand: Plus predicted protein [Physcomitrella patens subsp. patens] >gi|162662081|gb|EDQ49057.1| predicted protein [Physcomitrella patens subsp. patens] Score:70 bits(172), Expect:8e-11, Identities:42/83(51%), Positives:50/83(60%), Gaps:8.83(10%) gi|168068 142 RGLCHHADSDGQFHSTLPIKDIKRIGGCRDDALAGMPSDEP-----RAQLAFKNSMIHGI 0 |...|.......|.|.......|..||---||......|.|-----||||||||||.||| 5 0 RPTAHRSARETNFRSQTVESRRKWVGG---DAM*DAQADVPSA*WLRAQLAFKNSMVHGI gi|168068 197 LQFTLRIAFRCVLHRCKSQDIRC 220 60 ||||..||||.|||||.|.|||| 83 5 57 LQFTPSIAFRYVLHRCESRDIRC 80 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|74622391|sp|Q8TGM5|ART3_YEAST") self.assertEqual(hit.target.name, "Q8TGM5") self.assertEqual( hit.target.description, "Uncharacterized protein ART3 (Antisense to ribosomal RNA transcript protein 3) >gi|18767126|gb|AAL79278.1| unknown [Saccharomyces cerevisiae]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=67)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 141.0) self.assertAlmostEqual(hsp.annotations["bit score"], 58.9213724843908) self.assertAlmostEqual( hsp.annotations["evalue"], 2.99274389212967e-07, places=21 ) self.assertEqual(hsp.annotations["identity"], 29) self.assertEqual(hsp.annotations["positive"], 32) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 7, 46], [ 0, 39]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 39)) self.assertEqual( repr(hsp.query.seq), "Seq('GATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRV')" ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(39))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(5:248..364)") self.assertEqual( repr(hsp.target.seq), "Seq({7: 'GAMCVQRFDDSRNSAIHITYRISLRSSSMREPRDPLLKV'}, length=67)", ) self.assertEqual(hsp.target.id, "gi|74622391|sp|Q8TGM5|ART3_YEAST") self.assertEqual(hsp.target.name, "Q8TGM5") self.assertEqual( hsp.target.description, "Uncharacterized protein ART3 (Antisense to ribosomal RNA transcript protein 3) >gi|18767126|gb|AAL79278.1| unknown [Saccharomyces cerevisiae]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "GA CVQ+ D SR+SAIH YRISLRSSSMREPR PL +V" ) self.assertEqual( str(hsp), """\ Query : 5 Length: 39 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|74622391|sp|Q8TGM5|ART3_YEAST Length: 67 Strand: Plus Uncharacterized protein ART3 (Antisense to ribosomal RNA transcript protein 3) >gi|18767126|gb|AAL79278.1| unknown [Saccharomyces cerevisiae] Score:58 bits(141), Expect:3e-07, Identities:29/39(74%), Positives:32/39(82%), Gaps:0.39(0%) gi|746223 7 GAMCVQRFDDSRNSAIHITYRISLRSSSMREPRDPLLKV 46 0 ||.|||..|.||.||||..||||||||||||||.||..| 39 5 0 GATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRV 39 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|168069944|ref|XP_001786634.1|") self.assertEqual(hit.target.name, "XP_001786634") self.assertEqual( hit.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162660825|gb|EDQ48552.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=138)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 137.0) self.assertAlmostEqual(hsp.annotations["bit score"], 57.3805741807214) self.assertAlmostEqual( hsp.annotations["evalue"], 8.70755166175354e-07, places=21 ) self.assertEqual(hsp.annotations["identity"], 28) self.assertEqual(hsp.annotations["positive"], 31) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 34], [ 0, 34]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 34)) self.assertEqual( repr(hsp.query.seq), "Seq('KLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')" ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(34))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..346)") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'KLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=138)", ) self.assertEqual(hsp.target.id, "gi|168069944|ref|XP_001786634.1|") self.assertEqual(hsp.target.name, "XP_001786634") self.assertEqual( hsp.target.description, "predicted protein [Physcomitrella patens subsp. patens] >gi|162660825|gb|EDQ48552.1| predicted protein [Physcomitrella patens subsp. patens]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "KLD SR+SAIHT YRISLRSSS++EPRYPL RVV" ) self.assertEqual( str(hsp), """\ Query : 5 Length: 34 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|168069944|ref|XP_001786634.1| Length: 138 Strand: Plus predicted protein [Physcomitrella patens subsp. patens] >gi|162660825|gb|EDQ48552.1| predicted protein [Physcomitrella patens subsp. patens] Score:57 bits(137), Expect:9e-07, Identities:28/34(82%), Positives:31/34(91%), Gaps:0.34(0%) gi|168069 0 KLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV 34 0 |||.||.|||||.|||||||||..||||||.||| 34 5 0 KLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 34 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|50307717|ref|XP_453851.1|") self.assertEqual(hit.target.name, "XP_453851") self.assertEqual( hit.target.description, "unnamed protein product [Kluyveromyces lactis]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=54)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 134.0) self.assertAlmostEqual(hsp.annotations["bit score"], 56.2249754529693) self.assertAlmostEqual( hsp.annotations["evalue"], 1.93984013155423e-06, places=20 ) self.assertEqual(hsp.annotations["identity"], 28) self.assertEqual(hsp.annotations["positive"], 31) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 7, 47], [ 0, 40]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 40)) self.assertEqual( repr(hsp.query.seq), "Seq('GATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')" ) self.assertEqual(hsp.query.id, "5") self.assertEqual( hsp.query.description, "gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(40))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..364)") self.assertEqual( repr(hsp.target.seq), "Seq({7: 'GAMCVQRFDDSRKSAIHNTYRNSLRSSSMREPRDPLLKVL'}, length=54)", ) self.assertEqual(hsp.target.id, "gi|50307717|ref|XP_453851.1|") self.assertEqual(hsp.target.name, "XP_453851") self.assertEqual( hsp.target.description, "unnamed protein product [Kluyveromyces lactis]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "GA CVQ+ D SR SAIH YR SLRSSSMREPR PL +V+" ) self.assertEqual( str(hsp), """\ Query : 5 Length: 40 Strand: Plus gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence Target: gi|50307717|ref|XP_453851.1| Length: 54 Strand: Plus unnamed protein product [Kluyveromyces lactis] Score:56 bits(134), Expect:2e-06, Identities:28/40(70%), Positives:31/40(78%), Gaps:0.40(0%) gi|503077 7 GAMCVQRFDDSRKSAIHNTYRNSLRSSSMREPRDPLLKVL 47 0 ||.|||..|.||.||||..||.|||||||||||.||..|. 40 5 0 GATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 40 """, ) record = next(records) self.assertEqual(record.num, 6) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "6") self.assertEqual( record.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=309)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 8994603) self.assertEqual(record.stat["db-len"], -1216159329) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 75367093081) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|3176603|gb|AAC18749.1|") self.assertEqual(hit.target.name, "AAC18749") self.assertEqual(hit.target.description, "phytochrome A [Lathyrus odoratus]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=103)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 543.0) self.assertAlmostEqual(hsp.annotations["bit score"], 213.771602003167) self.assertAlmostEqual( hsp.annotations["evalue"], 3.7262743863676e-54, places=67 ) self.assertEqual(hsp.annotations["identity"], 103) self.assertEqual(hsp.annotations["positive"], 103) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 103], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.target.id, "gi|3176603|gb|AAC18749.1|") self.assertEqual(hsp.target.name, "AAC18749") self.assertEqual(hsp.target.description, "phytochrome A [Lathyrus odoratus]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|3176603|gb|AAC18749.1| Length: 103 Strand: Plus phytochrome A [Lathyrus odoratus] Score:213 bits(543), Expect:4e-54, Identities:103/103(100%), Positives:103/103(100%), Gaps:0.103(0%) gi|317660 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|317660 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 60 ||||||||||||||||||||||||||||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|130188|sp|P15001.1|PHYA_PEA") self.assertEqual(hit.target.name, "P15001") self.assertEqual( hit.target.description, "RecName: Full=Phytochrome A >gi|169132|gb|AAA33682.1| phytochrome [Pisum sativum] >gi|295830|emb|CAA32242.1| phytochrome apoprotein [Pisum sativum] >gi|51173514|gb|AAT97643.1| phytochrome A apoprotein [Pisum sativum] >gi|226757|prf||1604466A phytochrome", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1124)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 530.0) self.assertAlmostEqual(hsp.annotations["bit score"], 208.764007516241) self.assertAlmostEqual( hsp.annotations["evalue"], 1.1987013044853e-52, places=65 ) self.assertEqual(hsp.annotations["identity"], 101) self.assertEqual(hsp.annotations["positive"], 102) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[275, 378], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq({275: 'QAARFLFMKNKVRMIVDCNAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=1124)", ) self.assertEqual(hsp.target.id, "gi|130188|sp|P15001.1|PHYA_PEA") self.assertEqual(hsp.target.name, "P15001") self.assertEqual( hsp.target.description, "RecName: Full=Phytochrome A >gi|169132|gb|AAA33682.1| phytochrome [Pisum sativum] >gi|295830|emb|CAA32242.1| phytochrome apoprotein [Pisum sativum] >gi|51173514|gb|AAT97643.1| phytochrome A apoprotein [Pisum sativum] >gi|226757|prf||1604466A phytochrome", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDC+AKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAVLPQKKKRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|130188|sp|P15001.1|PHYA_PEA Length: 1124 Strand: Plus RecName: Full=Phytochrome A >gi|169132|gb|AAA33682.1| phytochrome [Pisum sativum] >gi|295830|emb|CAA32242.1| phytochrome apoprotein [Pisum sativum] >gi|51173514|gb|AAT97643.1| phytochrome A apoprotein [Pisum sativum] >gi|226757|prf||1604466A phytochrome Score:208 bits(530), Expect:1e-52, Identities:101/103(98%), Positives:102/103(99%), Gaps:0.103(0%) gi|130188 275 QAARFLFMKNKVRMIVDCNAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 ||||||||||||||||||.||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|130188 335 SLVMAVVVNDSDEDGDSADAVLPQKKKRLWGLVVCHNTTPRFV 378 60 |||||||||||||||||.||||||||||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2499555|sp|P93673.1|PHYA_LATSA") self.assertEqual(hit.target.name, "P93673") self.assertEqual( hit.target.description, "RecName: Full=Phytochrome type A >gi|1848273|gb|AAB47994.1| phytochrome type A [Lathyrus sativus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1124)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 530.0) self.assertAlmostEqual(hsp.annotations["bit score"], 208.764007516241) self.assertAlmostEqual( hsp.annotations["evalue"], 1.1987013044853e-52, places=65 ) self.assertEqual(hsp.annotations["identity"], 101) self.assertEqual(hsp.annotations["positive"], 102) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[275, 378], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq({275: 'QAARFLFMKNKVRMIVDCNAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=1124)", ) self.assertEqual(hsp.target.id, "gi|2499555|sp|P93673.1|PHYA_LATSA") self.assertEqual(hsp.target.name, "P93673") self.assertEqual( hsp.target.description, "RecName: Full=Phytochrome type A >gi|1848273|gb|AAB47994.1| phytochrome type A [Lathyrus sativus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDC+AKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAVLPQKKKRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|2499555|sp|P93673.1|PHYA_LATSA Length: 1124 Strand: Plus RecName: Full=Phytochrome type A >gi|1848273|gb|AAB47994.1| phytochrome type A [Lathyrus sativus] Score:208 bits(530), Expect:1e-52, Identities:101/103(98%), Positives:102/103(99%), Gaps:0.103(0%) gi|249955 275 QAARFLFMKNKVRMIVDCNAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 ||||||||||||||||||.||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|249955 335 SLVMAVVVNDSDEDGDSADAVLPQKKKRLWGLVVCHNTTPRFV 378 60 |||||||||||||||||.||||||||||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|3176595|gb|AAC18745.1|") self.assertEqual(hit.target.name, "AAC18745") self.assertEqual( hit.target.description, "phytochrome A [Lennea melanocarpa] >gi|3176597|gb|AAC18746.1| phytochrome A [Hebestigma cubense] >gi|3176609|gb|AAC18752.1| phytochrome A [Sesbania cochichinensis] >gi|3176611|gb|AAC18753.1| phytochrome A [Sesbania emerus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=103)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 528.0) self.assertAlmostEqual(hsp.annotations["bit score"], 207.993608364407) self.assertAlmostEqual( hsp.annotations["evalue"], 2.04467473791515e-52, places=66 ) self.assertEqual(hsp.annotations["identity"], 100) self.assertEqual(hsp.annotations["positive"], 101) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 103], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.target.id, "gi|3176595|gb|AAC18745.1|") self.assertEqual(hsp.target.name, "AAC18745") self.assertEqual( hsp.target.description, "phytochrome A [Lennea melanocarpa] >gi|3176597|gb|AAC18746.1| phytochrome A [Hebestigma cubense] >gi|3176609|gb|AAC18752.1| phytochrome A [Sesbania cochichinensis] >gi|3176611|gb|AAC18753.1| phytochrome A [Sesbania emerus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAV PQK+KRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|3176595|gb|AAC18745.1| Length: 103 Strand: Plus phytochrome A [Lennea melanocarpa] >gi|3176597|gb|AAC18746.1| phytochrome A [Hebestigma cubense] >gi|3176609|gb|AAC18752.1| phytochrome A [Sesbania cochichinensis] >gi|3176611|gb|AAC18753.1| phytochrome A [Sesbania emerus] Score:207 bits(528), Expect:2e-52, Identities:100/103(97%), Positives:101/103(98%), Gaps:0.103(0%) gi|317659 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|317659 60 SLVMAVVVNDSDEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 103 60 |||||||||||||||||.|||.|||.||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1711106|gb|AAC18675.1|") self.assertEqual(hit.target.name, "AAC18675") self.assertEqual(hit.target.description, "phytochrome A [Sophora affinis]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=210)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 528.0) self.assertAlmostEqual(hsp.annotations["bit score"], 207.993608364407) self.assertAlmostEqual( hsp.annotations["evalue"], 2.04467473791515e-52, places=66 ) self.assertEqual(hsp.annotations["identity"], 100) self.assertEqual(hsp.annotations["positive"], 101) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 40, 143], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq({40: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=210)", ) self.assertEqual(hsp.target.id, "gi|1711106|gb|AAC18675.1|") self.assertEqual(hsp.target.name, "AAC18675") self.assertEqual(hsp.target.description, "phytochrome A [Sophora affinis]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAV PQK+KRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|1711106|gb|AAC18675.1| Length: 210 Strand: Plus phytochrome A [Sophora affinis] Score:207 bits(528), Expect:2e-52, Identities:100/103(97%), Positives:101/103(98%), Gaps:0.103(0%) gi|171110 40 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|171110 100 SLVMAVVVNDSDEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 143 60 |||||||||||||||||.|||.|||.||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1711090|gb|AAC18670.1|") self.assertEqual(hit.target.name, "AAC18670") self.assertEqual(hit.target.description, "phytochrome A [Myrospermum sousanum]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=210)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 525.0) self.assertAlmostEqual(hsp.annotations["bit score"], 206.838009636654) self.assertAlmostEqual( hsp.annotations["evalue"], 4.55506009801166e-52, places=66 ) self.assertEqual(hsp.annotations["identity"], 99) self.assertEqual(hsp.annotations["positive"], 101) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 40, 143], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq({40: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=210)", ) self.assertEqual(hsp.target.id, "gi|1711090|gb|AAC18670.1|") self.assertEqual(hsp.target.name, "AAC18670") self.assertEqual(hsp.target.description, "phytochrome A [Myrospermum sousanum]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLV+AVVVNDSDEDGDS DAV PQK+KRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|1711090|gb|AAC18670.1| Length: 210 Strand: Plus phytochrome A [Myrospermum sousanum] Score:206 bits(525), Expect:5e-52, Identities:99/103(96%), Positives:101/103(98%), Gaps:0.103(0%) gi|171109 40 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|171109 100 SLVLAVVVNDSDEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 143 60 |||.|||||||||||||.|||.|||.||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|3176605|gb|AAC18750.1|") self.assertEqual(hit.target.name, "AAC18750") self.assertEqual(hit.target.description, "phytochrome A [Hybosema robustum]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=103)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 524.0) self.assertAlmostEqual(hsp.annotations["bit score"], 206.452810060737) self.assertAlmostEqual( hsp.annotations["evalue"], 5.94909272347008e-52, places=66 ) self.assertEqual(hsp.annotations["identity"], 99) self.assertEqual(hsp.annotations["positive"], 100) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 103], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq('QATRFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.target.id, "gi|3176605|gb|AAC18750.1|") self.assertEqual(hsp.target.name, "AAC18750") self.assertEqual(hsp.target.description, "phytochrome A [Hybosema robustum]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QA RFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAV PQK+KRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|3176605|gb|AAC18750.1| Length: 103 Strand: Plus phytochrome A [Hybosema robustum] Score:206 bits(524), Expect:6e-52, Identities:99/103(96%), Positives:100/103(97%), Gaps:0.103(0%) gi|317660 0 QATRFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 ||.||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|317660 60 SLVMAVVVNDSDEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 103 60 |||||||||||||||||.|||.|||.||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|3176454|gb|AAC18668.1|") self.assertEqual(hit.target.name, "AAC18668") self.assertEqual(hit.target.description, "phytochrome A [Cyclolobium nutans]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=207)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 523.0) self.assertAlmostEqual(hsp.annotations["bit score"], 206.06761048482) self.assertAlmostEqual( hsp.annotations["evalue"], 7.76975571582328e-52, places=66 ) self.assertEqual(hsp.annotations["identity"], 99) self.assertEqual(hsp.annotations["positive"], 101) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 37, 140], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq({37: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=207)", ) self.assertEqual(hsp.target.id, "gi|3176454|gb|AAC18668.1|") self.assertEqual(hsp.target.name, "AAC18668") self.assertEqual(hsp.target.description, "phytochrome A [Cyclolobium nutans]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDG+S DAV PQK+KRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|3176454|gb|AAC18668.1| Length: 207 Strand: Plus phytochrome A [Cyclolobium nutans] Score:206 bits(523), Expect:8e-52, Identities:99/103(96%), Positives:101/103(98%), Gaps:0.103(0%) gi|317645 37 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|317645 97 SLVMAVVVNDSDEDGNSSDAVQPQKRKRLWGLVVCHNTTPRFV 140 60 |||||||||||||||.|.|||.|||.||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|3176523|gb|AAC18709.1|") self.assertEqual(hit.target.name, "AAC18709") self.assertEqual( hit.target.description, "phytochrome A [Millettia richardiana]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=139)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 521.0) self.assertAlmostEqual(hsp.annotations["bit score"], 205.297211332985) self.assertAlmostEqual( hsp.annotations["evalue"], 1.3253195915005e-51, places=64 ) self.assertEqual(hsp.annotations["identity"], 98) self.assertEqual(hsp.annotations["positive"], 101) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 36, 139], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq({36: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=139)", ) self.assertEqual(hsp.target.id, "gi|3176523|gb|AAC18709.1|") self.assertEqual(hsp.target.name, "AAC18709") self.assertEqual( hsp.target.description, "phytochrome A [Millettia richardiana]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVND++EDGDS DAV PQK+KRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|3176523|gb|AAC18709.1| Length: 139 Strand: Plus phytochrome A [Millettia richardiana] Score:205 bits(521), Expect:1e-51, Identities:98/103(95%), Positives:101/103(98%), Gaps:0.103(0%) gi|317652 36 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|317652 96 SLVMAVVVNDNEEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 139 60 ||||||||||..|||||.|||.|||.||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|3176494|gb|AAC18693.1|") self.assertEqual(hit.target.name, "AAC18693") self.assertEqual( hit.target.description, "phytochrome A [Callerya atropurpurea]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=177)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 520.0) self.assertAlmostEqual(hsp.annotations["bit score"], 204.912011757068) self.assertAlmostEqual( hsp.annotations["evalue"], 1.73092099081406e-51, places=65 ) self.assertEqual(hsp.annotations["identity"], 98) self.assertEqual(hsp.annotations["positive"], 101) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 7, 110], [ 0, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 103)) self.assertEqual( repr(hsp.query.seq), "Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')", ) self.assertEqual(hsp.query.id, "6") self.assertEqual( hsp.query.description, "gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(103))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "6:1..309") self.assertEqual( repr(hsp.target.seq), "Seq({7: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=177)", ) self.assertEqual(hsp.target.id, "gi|3176494|gb|AAC18693.1|") self.assertEqual(hsp.target.name, "AAC18693") self.assertEqual( hsp.target.description, "phytochrome A [Callerya atropurpurea]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDS+EDGDS +AV PQK+KRLWGLVVCHNTTPRFV", ) self.assertEqual( str(hsp), """\ Query : 6 Length: 103 Strand: Plus gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds Target: gi|3176494|gb|AAC18693.1| Length: 177 Strand: Plus phytochrome A [Callerya atropurpurea] Score:204 bits(520), Expect:2e-51, Identities:98/103(95%), Positives:101/103(98%), Gaps:0.103(0%) gi|317649 7 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA gi|317649 67 SLVMAVVVNDSEEDGDSSEAVQPQKRKRLWGLVVCHNTTPRFV 110 60 |||||||||||.|||||..||.|||.||||||||||||||||| 103 6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103 """, ) record = next(records) self.assertEqual(record.num, 7) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "7") self.assertEqual( record.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=2551)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 8994603) self.assertEqual(record.stat["db-len"], -1216159329) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 1251086325060) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|27805603|sp|Q9TKP6.1|MATK_WISFR") self.assertEqual(hit.target.name, "Q9TKP6") self.assertEqual( hit.target.description, "RecName: Full=Maturase K; AltName: Full=Intron maturase >gi|5817759|gb|AAD52902.1|AF142731_1 maturase-like protein [Wisteria frutescens]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2451.0) self.assertAlmostEqual(hsp.annotations["bit score"], 948.732392853477) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 506) self.assertEqual(hsp.annotations["positive"], 506) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|27805603|sp|Q9TKP6.1|MATK_WISFR") self.assertEqual(hsp.target.name, "Q9TKP6") self.assertEqual( hsp.target.description, "RecName: Full=Maturase K; AltName: Full=Intron maturase >gi|5817759|gb|AAD52902.1|AF142731_1 maturase-like protein [Wisteria frutescens]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLYHFSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTSSTLQRLHRNRIWYLDILFSNDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|27805603|sp|Q9TKP6.1|MATK_WISFR Length: 506 Strand: Plus RecName: Full=Maturase K; AltName: Full=Intron maturase >gi|5817759|gb|AAD52902.1|AF142731_1 maturase-like protein [Wisteria frutescens] Score:948 bits(2451), Expect:0, Identities:506/506(100%), Positives:506/506(100%), Gaps:0.506(0%) gi|278056 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|278056 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|278056 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF 120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........ 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|278056 180 LYHFSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV 180 ....|||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|278056 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|278056 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|278056 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|278056 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTS 420 ||||||||||||||||||||||||||||||||||||....................|||| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|278056 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 480 |||||||||||||||||||||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|171909144|gb|ACB58148.1|") self.assertEqual(hit.target.name, "ACB58148") self.assertEqual(hit.target.description, "maturase K [Wisteria frutescens]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2445.0) self.assertAlmostEqual(hsp.annotations["bit score"], 946.421195397973) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 505) self.assertEqual(hsp.annotations["positive"], 505) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKYSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|171909144|gb|ACB58148.1|") self.assertEqual(hsp.target.name, "ACB58148") self.assertEqual(hsp.target.description, "maturase K [Wisteria frutescens]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNK SLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLYHFSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTSSTLQRLHRNRIWYLDILFSNDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|171909144|gb|ACB58148.1| Length: 506 Strand: Plus maturase K [Wisteria frutescens] Score:946 bits(2445), Expect:0, Identities:505/506(100%), Positives:505/506(100%), Gaps:0.506(0%) gi|171909 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKYSLLIVKRL 0 |||||||||||||||||||||||||||||||||||||||||||||||||||.|||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|171909 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|171909 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF 120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........ 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|171909 180 LYHFSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV 180 ....|||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|171909 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|171909 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|171909 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|171909 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTS 420 ||||||||||||||||||||||||||||||||||||....................|||| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|171909 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 480 |||||||||||||||||||||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|171909146|gb|ACB58149.1|") self.assertEqual(hit.target.name, "ACB58149") self.assertEqual( hit.target.description, "maturase K [Wisteria frutescens] >gi|171909148|gb|ACB58150.1| maturase K [Wisteria frutescens var. macrostachya]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2443.0) self.assertAlmostEqual(hsp.annotations["bit score"], 945.650796246138) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 505) self.assertEqual(hsp.annotations["positive"], 505) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|171909146|gb|ACB58149.1|") self.assertEqual(hsp.target.name, "ACB58149") self.assertEqual( hsp.target.description, "maturase K [Wisteria frutescens] >gi|171909148|gb|ACB58150.1| maturase K [Wisteria frutescens var. macrostachya]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLYHFSNRNSLITP KSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTSSTLQRLHRNRIWYLDILFSNDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|171909146|gb|ACB58149.1| Length: 506 Strand: Plus maturase K [Wisteria frutescens] >gi|171909148|gb|ACB58150.1| maturase K [Wisteria frutescens var. macrostachya] Score:945 bits(2443), Expect:0, Identities:505/506(100%), Positives:505/506(100%), Gaps:0.506(0%) gi|171909 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|171909 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|171909 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF 120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........ 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|171909 180 LYHFSNRNSLITPIKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV 180 ....|||||||||.|||||||||||||||||||||||||||||||||||||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|171909 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|171909 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|171909 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS 360 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|171909 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTS 420 ||||||||||||||||||||||||||||||||||||....................|||| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|171909 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 480 |||||||||||||||||||||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|171909132|gb|ACB58142.1|") self.assertEqual(hit.target.name, "ACB58142") self.assertEqual(hit.target.description, "maturase K [Callerya megasperma]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2439.0) self.assertAlmostEqual(hsp.annotations["bit score"], 944.109997942469) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 501) self.assertEqual(hsp.annotations["positive"], 504) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|171909132|gb|ACB58142.1|") self.assertEqual(hsp.target.name, "ACB58142") self.assertEqual(hsp.target.description, "maturase K [Callerya megasperma]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLY++ NRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPR SSTLQRLHRNRIWYLDILFSNDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|171909132|gb|ACB58142.1| Length: 506 Strand: Plus maturase K [Callerya megasperma] Score:944 bits(2439), Expect:0, Identities:501/506(99%), Positives:504/506(100%), Gaps:0.506(0%) gi|171909 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|171909 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV 60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|171909 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF 120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........ 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|171909 180 LYNYCNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV 180 .....||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|171909 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|171909 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|171909 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKSVWADSSDFDIIDRFLRICRNLSHYYNGSS 360 |||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|171909 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRAS 420 ||||||||||||||||||||||||||||||||||||....................||.| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|171909 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 480 |||||||||||||||||||||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|5817760|gb|AAD52903.1|AF142732_1") self.assertEqual(hit.target.name, "AAD52903") self.assertEqual( hit.target.description, "maturase-like protein [Wisteria sinensis] >gi|171909136|gb|ACB58144.1| maturase K [Wisteria brachybotrys] >gi|171909138|gb|ACB58145.1| maturase K [Wisteria floribunda] >gi|171909140|gb|ACB58146.1| maturase K [Wisteria floribunda] >gi|171909142|gb|ACB58147.1| maturase K [Wisteria floribunda] >gi|171909150|gb|ACB58151.1| maturase K [Wisteria sinensis] >gi|171909152|gb|ACB58152.1| maturase K [Wisteria sinensis] >gi|171909154|gb|ACB58153.1| maturase K [Wisteria sinensis] >gi|171909156|gb|ACB58154.1| maturase K [Wisteria villosa] >gi|171909158|gb|ACB58155.1| maturase K [Wisteria villosa] >gi|171909160|gb|ACB58156.1| maturase K [Wisteria villosa]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2418.0) self.assertAlmostEqual(hsp.annotations["bit score"], 936.020806848204) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 498) self.assertEqual(hsp.annotations["positive"], 500) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDKKSSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|5817760|gb|AAD52903.1|AF142732_1") self.assertEqual(hsp.target.name, "AAD52903") self.assertEqual( hsp.target.description, "maturase-like protein [Wisteria sinensis] >gi|171909136|gb|ACB58144.1| maturase K [Wisteria brachybotrys] >gi|171909138|gb|ACB58145.1| maturase K [Wisteria floribunda] >gi|171909140|gb|ACB58146.1| maturase K [Wisteria floribunda] >gi|171909142|gb|ACB58147.1| maturase K [Wisteria floribunda] >gi|171909150|gb|ACB58151.1| maturase K [Wisteria sinensis] >gi|171909152|gb|ACB58152.1| maturase K [Wisteria sinensis] >gi|171909154|gb|ACB58153.1| maturase K [Wisteria sinensis] >gi|171909156|gb|ACB58154.1| maturase K [Wisteria villosa] >gi|171909158|gb|ACB58155.1| maturase K [Wisteria villosa] >gi|171909160|gb|ACB58156.1| maturase K [Wisteria villosa]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYD KSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYN NFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDK TY NYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLY+F NRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPR SSTLQRLHRNRIWYLDILFSNDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|5817760|gb|AAD52903.1|AF142732_1 Length: 506 Strand: Plus maturase-like protein [Wisteria sinensis] >gi|171909136|gb|ACB58144.1| maturase K [Wisteria brachybotrys] >gi|171909138|gb|ACB58145.1| maturase K [Wisteria floribunda] >gi|171909140|gb|ACB58146.1| maturase K [Wisteria floribunda] >gi|171909142|gb|ACB58147.1| maturase K [Wisteria floribunda] >gi|171909150|gb|ACB58151.1| maturase K [Wisteria sinensis] >gi|171909152|gb|ACB58152.1| maturase K [Wisteria sinensis] >gi|171909154|gb|ACB58153.1| maturase K [Wisteria sinensis] >gi|171909156|gb|ACB58154.1| maturase K [Wisteria villosa] >gi|171909158|gb|ACB58155.1| maturase K [Wisteria villosa] >gi|171909160|gb|ACB58156.1| maturase K [Wisteria villosa] Score:936 bits(2418), Expect:0, Identities:498/506(98%), Positives:500/506(99%), Gaps:0.506(0%) gi|581776 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDKKSSLLIVKRL 0 |||||||||||||||||||||||||||||||||||||||||||||||||.|||||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|581776 60 ITRMYQQNHLIISANDSNKNPFLGYNNNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV 60 ||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|581776 120 KSYHNLRSIHSIFPFLEDKLTYFNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF 120 |||||||||||||||||||.||.|||||||||||||||||||||||||||||........ 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|581776 180 LYNFCNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV 180 .....||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|581776 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|581776 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|581776 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKSVWADSSDFDIIDRFLRICRNLSHYYNGSS 360 |||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|581776 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRAS 420 ||||||||||||||||||||||||||||||||||||....................||.| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|581776 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 480 |||||||||||||||||||||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|171909134|gb|ACB58143.1|") self.assertEqual(hit.target.name, "ACB58143") self.assertEqual(hit.target.description, "maturase K [Wisteria brachybotrys]") self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2398.0) self.assertAlmostEqual(hsp.annotations["bit score"], 928.316815329857) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 496) self.assertEqual(hsp.annotations["positive"], 498) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDKKSSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|171909134|gb|ACB58143.1|") self.assertEqual(hsp.target.name, "ACB58143") self.assertEqual(hsp.target.description, "maturase K [Wisteria brachybotrys]") self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYD KSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYN FYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDK TY NYVSDIRIPYPIHLEILVQILRY VKDASFFHLLRFFLY+F NRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPR SSTLQRLHRNRIWYLDILFSNDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|171909134|gb|ACB58143.1| Length: 506 Strand: Plus maturase K [Wisteria brachybotrys] Score:928 bits(2398), Expect:0, Identities:496/506(98%), Positives:498/506(98%), Gaps:0.506(0%) gi|171909 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDKKSSLLIVKRL 0 |||||||||||||||||||||||||||||||||||||||||||||||||.|||||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|171909 60 ITRMYQQNHLIISANDSNKNPFLGYNNKFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV 60 ||||||||||||||||||||||||||..|||||||||||||||||||||||||||||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|171909 120 KSYHNLRSIHSIFPFLEDKLTYFNYVSDIRIPYPIHLEILVQILRYRVKDASFFHLLRFF 120 |||||||||||||||||||.||.|||||||||||||||||||||||.|||||........ 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|171909 180 LYNFCNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV 180 .....||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|171909 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY 240 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|171909 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|171909 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKSVWADSSDFDIIDRFLRICRNLSHYYNGSS 360 |||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|171909 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRAS 420 ||||||||||||||||||||||||||||||||||||....................||.| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|171909 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 480 |||||||||||||||||||||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|5817761|gb|AAD52904.1|AF142733_1") self.assertEqual(hit.target.name, "AAD52904") self.assertEqual( hit.target.description, "maturase-like protein [Callerya reticulata]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2390.0) self.assertAlmostEqual(hsp.annotations["bit score"], 925.235218722518) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 493) self.assertEqual(hsp.annotations["positive"], 498) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQAYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|5817761|gb|AAD52904.1|AF142733_1") self.assertEqual(hsp.target.name, "AAD52904") self.assertEqual( hsp.target.description, "maturase-like protein [Callerya reticulata]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQ YLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQL+SSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLY+F NRNSLITPKKSISTFSK NPRLFLFLYNFYV EYESIF FLRNQSSHLR KSFSVFFERIFFYAKREHL+KVFPKDFSSTLTFFKDPFIHYVRYQ KSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPR SSTL+RLHRNRIWYLDILFSNDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|5817761|gb|AAD52904.1|AF142733_1 Length: 506 Strand: Plus maturase-like protein [Callerya reticulata] Score:925 bits(2390), Expect:0, Identities:493/506(97%), Positives:498/506(98%), Gaps:0.506(0%) gi|581776 0 MKEYQAYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL 0 |||||.|||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|581776 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLNSSLEEAEIV 60 ||||||||||||||||||||||||||||||||||||||||||||||||||.||||||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|581776 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF 120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........ 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|581776 180 LYNFCNRNSLITPKKSISTFSKCNPRLFLFLYNFYVWEYESIFLFLRNQSSHLRFKSFSV 180 .....|||||||||||||||||.|||||||||||||.||||||.||||||||||.||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|581776 240 FFERIFFYAKREHLLKVFPKDFSSTLTFFKDPFIHYVRYQEKSILASKNAPLLMNKWKHY 240 ||||||||||||||.|||||||||||||||||||||||||.||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|581776 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|581776 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKSVWADSSDFDIIDRFLRICRNLSHYYNGSS 360 |||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|581776 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRAS 420 ||||||||||||||||||||||||||||||||||||....................||.| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|581776 480 STLKRLHRNRIWYLDILFSNDLVNHE 506 480 |||.|||||||||||||||||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|5817762|gb|AAD52905.1|AF142734_1") self.assertEqual(hit.target.name, "AAD52905") self.assertEqual( hit.target.description, "maturase-like protein [Callerya atropurpurea]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2301.0) self.assertAlmostEqual(hsp.annotations["bit score"], 890.952456465874) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 472) self.assertEqual(hsp.annotations["positive"], 488) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYTYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|5817762|gb|AAD52905.1|AF142734_1") self.assertEqual(hsp.target.name, "AAD52905") self.assertEqual( hsp.target.description, "maturase-like protein [Callerya atropurpurea]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQVYLERDRSRQQDFLYPLIFREY YGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIIS NDSNKNPFLGYNKNFYSQIIS+ FA+V EIPFF QLSSSLE+AEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDA FFHLLR FLY+F N N++ TPKKSISTFS+SNPR FLFLYNFYVCEYESIF FLRN+SSHLRLKSFSVFFERIFFYAKREHLV+VF KDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQ FFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTF+IEI IKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SK VWADSSDFDII+RFLRICRNLSHYYNGSSKKK+LYRIKYILRLSCIKTLACKHKSTVRAFLK+ GSEELLEEFFTEEEEILSLIFPR SSTLQ+LHRNRIWYLDILF+NDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|5817762|gb|AAD52905.1|AF142734_1 Length: 506 Strand: Plus maturase-like protein [Callerya atropurpurea] Score:890 bits(2301), Expect:0, Identities:472/506(93%), Positives:488/506(96%), Gaps:0.506(0%) gi|581776 0 MKEYQVYLERDRSRQQDFLYPLIFREYTYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL 0 |||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|581776 60 ITRMYQQNHLIISPNDSNKNPFLGYNKNFYSQIISEVFAIVAEIPFFRQLSSSLEQAEIV 60 |||||||||||||.|||||||||||||||||||||..||.|.|||||.|||||||.|||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|581776 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDAPFFHLLRLF 120 |||||||||||||||||||||||||||||||||||||||||||||||||||......... 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|581776 180 LYNFCNWNTVFTPKKSISTFSRSNPRFFLFLYNFYVCEYESIFLFLRNKSSHLRLKSFSV 180 .....|.|...||||||||||.||||.||||||||||||||||.||||.||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|581776 240 FFERIFFYAKREHLVEVFAKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY 240 |||||||||||||||.||.||||||||||||||||||||||||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|581776 300 FIHLWQSFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFIIEIGIK 300 ||||||.||||||||||||||||||||||||||||||||||||||||||||||.|||.|| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|581776 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKPVWADSSDFDIIERFLRICRNLSHYYNGSS 360 |||||||||||||||||||||||||||.||.|||||||||||.||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|581776 420 KKKSLYRIKYILRLSCIKTLACKHKSTVRAFLKRLGSEELLEEFFTEEEEILSLIFPRAS 420 |||.|||||||||||||||||||||||||||||..|....................||.| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|581776 480 STLQKLHRNRIWYLDILFTNDLVNHE 506 480 ||||.|||||||||||||.||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|152014012|gb|ABS20107.1|") self.assertEqual(hit.target.name, "ABS20107") self.assertEqual( hit.target.description, "maturase-like protein [Astragalus uliginosus]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2293.0) self.assertAlmostEqual(hsp.annotations["bit score"], 887.870859858535) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 470) self.assertEqual(hsp.annotations["positive"], 487) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVFLERDRSRQQDFLYPLIFREYVYGLAYSHDFNRSTFVENVGYDNKYSL...NHE')", ) self.assertEqual(hsp.target.id, "gi|152014012|gb|ABS20107.1|") self.assertEqual(hsp.target.name, "ABS20107") self.assertEqual( hsp.target.description, "maturase-like protein [Astragalus uliginosus]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQV+LERDRSRQQDFLYPLIFREY+YGLAYSHDFNRS FVENVGYDNK SLLIVKRLITRMYQQNHLIISANDS KNPFLGYNKNFYSQIIS+GFA+VVEIPFFLQ SSSL+EAEIVKSY NLRSIHSIFPFLEDKF YLNYVSDIRIPYPIHLEILVQILRYWVKDA FFHLLR FLY+F NRNS +TPKKSISTFSKSNPRLFLFLYNFYVCEYESIF FLR +SSHLRLKSFSVFFERIFFYAKREHLV+VF KDFSSTLTFFKDP IHYVRYQGKSILASKNAPLLMNKWKHYFIHLW+CFFDVWSQPGTIHI QLSEHSF+ LGYFSNVRLNRSVVRSQMLQNTFLIEIV KKLDIIVPIIP+IRSLAKAKFCNVLGHP+SK+VWADSSDFDIIDRFLRICRNLSHYYNGSSKKK+LYRIKYILRLSCIKTLACKHKSTVRAFLK+SGSEELLEEFFTEEEEILSLIFPR SSTLQ+LH NRIWYLDILFSNDLVNHE", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|152014012|gb|ABS20107.1| Length: 506 Strand: Plus maturase-like protein [Astragalus uliginosus] Score:887 bits(2293), Expect:0, Identities:470/506(93%), Positives:487/506(96%), Gaps:0.506(0%) gi|152014 0 MKEYQVFLERDRSRQQDFLYPLIFREYVYGLAYSHDFNRSTFVENVGYDNKYSLLIVKRL 0 ||||||.||||||||||||||||||||.||||||||||||.||||||||||.|||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|152014 60 ITRMYQQNHLIISANDSKKNPFLGYNKNFYSQIISEGFAIVVEIPFFLQFSSSLKEAEIV 60 |||||||||||||||||.|||||||||||||||||.|||.|||||||||.||||.||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|152014 120 KSYKNLRSIHSIFPFLEDKFPYLNYVSDIRIPYPIHLEILVQILRYWVKDAPFFHLLRLF 120 |||.||||||||||||||||.||||||||||||||||||||||||||||||......... 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|152014 180 LYNFCNRNSFLTPKKSISTFSKSNPRLFLFLYNFYVCEYESIFLFLRKKSSHLRLKSFSV 180 .....||||..||||||||||||||||||||||||||||||||.|||..||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|152014 240 FFERIFFYAKREHLVEVFAKDFSSTLTFFKDPLIHYVRYQGKSILASKNAPLLMNKWKHY 240 |||||||||||||||.||.|||||||||||||.||||||||||||||||||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|152014 300 FIHLWECFFDVWSQPGTIHIKQLSEHSFYLLGYFSNVRLNRSVVRSQMLQNTFLIEIVSK 300 |||||.||||||||||||||.|||||||..||||||||||||||||||||||||||||.| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|152014 360 KLDIIVPIIPIIRSLAKAKFCNVLGHPISKAVWADSSDFDIIDRFLRICRNLSHYYNGSS 360 ||||||||||.||||||||||||||||.||.||||||||||||||||||||||||||||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|152014 420 KKKSLYRIKYILRLSCIKTLACKHKSTVRAFLKRSGSEELLEEFFTEEEEILSLIFPRAS 420 |||.|||||||||||||||||||||||||||||.||....................||.| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|152014 480 STLQKLHGNRIWYLDILFSNDLVNHE 506 480 ||||.||.|||||||||||||||||| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|146197442|dbj|BAF57483.1|") self.assertEqual(hit.target.name, "BAF57483") self.assertEqual( hit.target.description, "maturase [Glycyrrhiza uralensis] >gi|146197444|dbj|BAF57484.1| maturase [Glycyrrhiza glabra] >gi|152014018|gb|ABS20110.1| maturase-like protein [Glycyrrhiza pallidiflora]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 2292.0) self.assertAlmostEqual(hsp.annotations["bit score"], 887.485660282618) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0) self.assertEqual(hsp.annotations["identity"], 471) self.assertEqual(hsp.annotations["positive"], 489) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 506], [ 0, 506]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 506)) self.assertEqual( repr(hsp.query.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')", ) self.assertEqual(hsp.query.id, "7") self.assertEqual( hsp.query.description, "gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(506))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244") self.assertEqual( repr(hsp.target.seq), "Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGIAYSHNLNRSIFVENVGYDNKFSL...NNE')", ) self.assertEqual(hsp.target.id, "gi|146197442|dbj|BAF57483.1|") self.assertEqual(hsp.target.name, "BAF57483") self.assertEqual( hsp.target.description, "maturase [Glycyrrhiza uralensis] >gi|146197444|dbj|BAF57484.1| maturase [Glycyrrhiza glabra] >gi|152014018|gb|ABS20110.1| maturase-like protein [Glycyrrhiza pallidiflora]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MKEYQVYLERDRSRQQDFLYPLIFREYIYG+AYSH+ NRSIFVENVGYDNK SLLIVKRLITRMYQQNHLIISANDSNKNPF GYNKN YSQ+ISDGFAVVVEIPFFLQ SSSLEEAEIVKSY+NLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDA FFHLLR FLY+F N NSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIF FLRN+SSHLRLKSFSVFFERIFFYAKREHLV VF KD+S TLT FKDPFIHYVRYQGK+ILAS+NAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SK VWADSSDF+II+RFLRICRNLSHYY+GSSKKK+LYRIKYILRLSCIKTLACKHKSTVRAFLK+ GSEELLEEFFTEEEEILSLIFP+ SSTLQ+LHRNRIWYLDILFSNDLVN+E", ) self.assertEqual( str(hsp), """\ Query : 7 Length: 506 Strand: Plus gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product Target: gi|146197442|dbj|BAF57483.1| Length: 506 Strand: Plus maturase [Glycyrrhiza uralensis] >gi|146197444|dbj|BAF57484.1| maturase [Glycyrrhiza glabra] >gi|152014018|gb|ABS20110.1| maturase-like protein [Glycyrrhiza pallidiflora] Score:887 bits(2292), Expect:0, Identities:471/506(93%), Positives:489/506(97%), Gaps:0.506(0%) gi|146197 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGIAYSHNLNRSIFVENVGYDNKFSLLIVKRL 0 ||||||||||||||||||||||||||||||.||||..||||||||||||||.|||||||| 7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL gi|146197 60 ITRMYQQNHLIISANDSNKNPFSGYNKNIYSQLISDGFAVVVEIPFFLQFSSSLEEAEIV 60 ||||||||||||||||||||||.|||||.|||.||||||||||||||||.|||||||||| 7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV gi|146197 120 KSYNNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDAPFFHLLRLF 120 |||.|||||||||||||||||||||||||||||||||||||||||||||||......... 7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX gi|146197 180 LYNFCNWNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFLFLRNKSSHLRLKSFSV 180 .....|.||||||||||||||||||||||||||||||||||||.||||.||||||||||| 7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV gi|146197 240 FFERIFFYAKREHLVDVFAKDYSPTLTLFKDPFIHYVRYQGKAILASRNAPLLMNKWKHY 240 |||||||||||||||.||.||.|.|||.||||||||||||||.||||.|||||||||||| 7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY gi|146197 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK 300 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| 7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK gi|146197 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKPVWADSSDFEIIERFLRICRNLSHYYSGSS 360 |||||||||||||||||||||||||||.||.||||||||.||.|||||||||||||.||| 7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS gi|146197 420 KKKSLYRIKYILRLSCIKTLACKHKSTVRAFLKRLGSEELLEEFFTEEEEILSLIFPKAS 420 |||.|||||||||||||||||||||||||||||..|....................|..| 7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS gi|146197 480 STLQKLHRNRIWYLDILFSNDLVNNE 506 480 ||||.|||||||||||||||||||.| 506 7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506 """, ) def test_xml_2222_blastx_001_writer(self): """Writing BLASTX 2.2.22+ (xml_2222_blastx_001.xml).""" filename = "xml_2222_blastx_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 7) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_2222_blastx_001(written_records) def test_xml_21500_blastx_001_parser(self): """Parsing BLASTX 2.15.0+ (xml_21500_blastx_001.xml).""" filename = "xml_21500_blastx_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_blastx_001_records(records) with Blast.parse(path) as records: self.check_xml_21500_blastx_001_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_blastx_001_record(record, xml2=False) record = Blast.read(path) self.check_xml_21500_blastx_001_record(record, xml2=False) with Blast.parse(path) as records: self.assertEqual( str(records), """\ Program: BLASTX 2.15.0+ db: nr Query: AI021773.1 (length=365) MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 emb|VDM03167.1| unnamed protein product, partial [Schi... 1 1 gb|EPB74633.1| hypothetical protein ANCCEY_06263 [Ancy... 2 1 ref|XP_009175831.1| hypothetical protein T265_11027 [O... 3 1 gb|EMD49430.1| actin, putative, partial [Entamoeba his... 4 1 emb|CAX83035.1| Actin-2, partial [Schistosoma japonicum] 5 1 emb|VDP83060.1| unnamed protein product, partial [Echi... 6 1 emb|CAA50205.1| actin, partial [Entamoeba histolytica] 7 1 emb|VDN44756.1| unnamed protein product, partial [Dibo... 8 1 ref|XP_027046469.1| actin-1, partial [Pocillopora dami... 9 1 ref|XP_027046487.1| actin-1-like [Pocillopora damicornis]""", ) def check_xml_21500_blastx_001_records(self, records, xml2=False): self.assertEqual(records.program, "blastx") self.assertEqual(records.version, "BLASTX 2.15.0+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "nr") if xml2: self.assertEqual(len(records.param), 7) else: self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "AI021773.1") self.assertEqual( records.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=365)") self.assertEqual(len(records.param), 5) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 10.0) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "F") if xml2: self.assertEqual(records.param["cbs"], 2) self.assertEqual(records.param["query-gencode"], 1) record = next(records) self.assertRaises(StopIteration, next, records) self.check_xml_21500_blastx_001_record(record, xml2) def check_xml_21500_blastx_001_record(self, record, xml2): if not xml2: self.assertEqual(record.num, 1) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "AI021773.1") self.assertEqual( record.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=365)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 718367499) self.assertEqual(record.stat["db-len"], 277248733561) if xml2: self.assertEqual(record.stat["hsp-len"], 89) self.assertEqual(record.stat["eff-space"], 6826048836800) else: self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "emb|VDM03167.1|") self.assertEqual(hit.target.name, "VDM03167") self.assertEqual( hit.target.description, "unnamed protein product, partial [Schistocephalus solidus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=132)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 408.0) self.assertAlmostEqual(hsp.annotations["bit score"], 161.77) self.assertAlmostEqual(hsp.annotations["evalue"], 3.13203e-48, places=53) self.assertEqual(hsp.annotations["identity"], 81) self.assertEqual(hsp.annotations["positive"], 83) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=132)", ) self.assertEqual(hsp.target.id, "emb|VDM03167.1|") self.assertEqual(hsp.target.name, "VDM03167") self.assertEqual( hsp.target.description, "unnamed protein product, partial [Schistocephalus solidus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MADEEVQALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: emb|VDM03167.1| Length: 132 Strand: Plus unnamed protein product, partial [Schistocephalus solidus] Score:161 bits(408), Expect:3e-48, Identities:81/108(75%), Positives:83/108(77%), Gaps:0.108(0%) emb|VDM03 0 MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 |||||||||||||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ emb|VDM03 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||||||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gb|EPB74633.1|") self.assertEqual(hit.target.name, "EPB74633") self.assertEqual( hit.target.description, "hypothetical protein ANCCEY_06263 [Ancylostoma ceylanicum]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=119)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 405.0) self.assertAlmostEqual(hsp.annotations["bit score"], 160.614) self.assertAlmostEqual(hsp.annotations["evalue"], 5.40441e-48, places=53) self.assertEqual(hsp.annotations["identity"], 81) self.assertEqual(hsp.annotations["positive"], 85) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 115], [ 0, 115]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 115)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...RKP')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(115))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..364") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MCDDDVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LKP'}, length=119)", ) self.assertEqual(hsp.target.id, "gb|EPB74633.1|") self.assertEqual(hsp.target.name, "EPB74633") self.assertEqual( hsp.target.description, "hypothetical protein ANCCEY_06263 [Ancylostoma ceylanicum]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "M D++V ALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE H I KP", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 115 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: gb|EPB74633.1| Length: 119 Strand: Plus hypothetical protein ANCCEY_06263 [Ancylostoma ceylanicum] Score:160 bits(405), Expect:5e-48, Identities:81/115(70%), Positives:85/115(74%), Gaps:0.115(0%) gb|EPB746 0 MCDDDVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 |.|..|.||||||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ gb|EPB746 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTEAHSILKP 115 60 ||||||||||||||||||||||||||||||||||||||||||||||||.|.|.|| 115 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTELHCIRKP 115 """, ) hit = record[2] self.assertEqual(hit.num, 3) target = hit.target self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "ref|XP_009175831.1|") self.assertEqual(target.name, "XP_009175831") seq = target.seq self.assertEqual(repr(seq), "Seq(None, length=246)") if xml2: self.assertEqual( target.description, "hypothetical protein T265_11027 [Opisthorchis viverrini]", ) self.assertIs(target, hit.targets[0]) self.assertEqual(len(hit.targets), 2) target = hit.targets[1] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "gb|KER20427.1|") self.assertEqual(target.name, "KER20427") self.assertIs(target.seq, seq) self.assertEqual( target.description, "hypothetical protein T265_11027 [Opisthorchis viverrini]", ) else: self.assertEqual( target.description, "hypothetical protein T265_11027 [Opisthorchis viverrini] >gb|KER20427.1| hypothetical protein T265_11027 [Opisthorchis viverrini]", ) self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 413.0) self.assertAlmostEqual(hsp.annotations["bit score"], 163.696) self.assertAlmostEqual(hsp.annotations["evalue"], 1.69953e-47, places=52) self.assertEqual(hsp.annotations["identity"], 81) self.assertEqual(hsp.annotations["positive"], 83) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=246)", ) self.assertEqual(hsp.target.id, hit.target.id) self.assertEqual(hsp.target.name, hit.target.name) self.assertEqual(hsp.target.description, hit.target.description) self.assertEqual(len(hit), 1) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MADEEVQALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE", ) if xml2: self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: ref|XP_009175831.1| Length: 246 Strand: Plus hypothetical protein T265_11027 [Opisthorchis viverrini] Score:163 bits(413), Expect:2e-47, Identities:81/108(75%), Positives:83/108(77%), Gaps:0.108(0%) ref|XP_00 0 MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 |||||||||||||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ ref|XP_00 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||||||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) else: self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: ref|XP_009175831.1| Length: 246 Strand: Plus hypothetical protein T265_11027 [Opisthorchis viverrini] >gb|KER20427.1| hypothetical protein T265_11027 [Opisthorchis viverrini] Score:163 bits(413), Expect:2e-47, Identities:81/108(75%), Positives:83/108(77%), Gaps:0.108(0%) ref|XP_00 0 MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 |||||||||||||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ ref|XP_00 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||||||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gb|EMD49430.1|") self.assertEqual(hit.target.name, "EMD49430") self.assertEqual( hit.target.description, "actin, putative, partial [Entamoeba histolytica KU27]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=124)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 401.0) self.assertAlmostEqual(hsp.annotations["bit score"], 159.073) self.assertAlmostEqual(hsp.annotations["evalue"], 3.49189e-47, places=52) self.assertEqual(hsp.annotations["identity"], 78) self.assertEqual(hsp.annotations["positive"], 81) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MGDEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHVSVMAGMGQKDAY...LTE'}, length=124)", ) self.assertEqual(hsp.target.id, "gb|EMD49430.1|") self.assertEqual(hsp.target.name, "EMD49430") self.assertEqual( hsp.target.description, "actin, putative, partial [Entamoeba histolytica KU27]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "M DEEVQALVVDNGSGMCKAG ++ P G KD+YVGDEAQSKRGILTLKYPIEHGIV NWDDMEKIWHHTFYNELRVAPEEHPVLLTE", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: gb|EMD49430.1| Length: 124 Strand: Plus actin, putative, partial [Entamoeba histolytica KU27] Score:159 bits(401), Expect:3e-47, Identities:78/108(72%), Positives:81/108(75%), Gaps:0.108(0%) gb|EMD494 0 MGDEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHVSVMAGMGQKDAYVGDEAQ 0 |.|||||||||||||||||||...........|...............|.||.||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ gb|EMD494 60 SKRGILTLKYPIEHGIVNNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||.|||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "emb|CAX83035.1|") self.assertEqual(hit.target.name, "CAX83035") self.assertEqual( hit.target.description, "Actin-2, partial [Schistosoma japonicum]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=252)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 411.0) self.assertAlmostEqual(hsp.annotations["bit score"], 162.925) self.assertAlmostEqual(hsp.annotations["evalue"], 3.86747e-47, places=52) self.assertEqual(hsp.annotations["identity"], 81) self.assertEqual(hsp.annotations["positive"], 83) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=252)", ) self.assertEqual(hsp.target.id, "emb|CAX83035.1|") self.assertEqual(hsp.target.name, "CAX83035") self.assertEqual( hsp.target.description, "Actin-2, partial [Schistosoma japonicum]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MADEEVQALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: emb|CAX83035.1| Length: 252 Strand: Plus Actin-2, partial [Schistosoma japonicum] Score:162 bits(411), Expect:4e-47, Identities:81/108(75%), Positives:83/108(77%), Gaps:0.108(0%) emb|CAX83 0 MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 |||||||||||||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ emb|CAX83 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||||||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "emb|VDP83060.1|") self.assertEqual(hit.target.name, "VDP83060") self.assertEqual( hit.target.description, "unnamed protein product, partial [Echinostoma caproni]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=209)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 407.0) self.assertAlmostEqual(hsp.annotations["bit score"], 161.384) self.assertAlmostEqual(hsp.annotations["evalue"], 4.49182e-47, places=52) self.assertEqual(hsp.annotations["identity"], 80) self.assertEqual(hsp.annotations["positive"], 83) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MADDEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=209)", ) self.assertEqual(hsp.target.id, "emb|VDP83060.1|") self.assertEqual(hsp.target.name, "VDP83060") self.assertEqual( hsp.target.description, "unnamed protein product, partial [Echinostoma caproni]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MAD+EVQALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: emb|VDP83060.1| Length: 209 Strand: Plus unnamed protein product, partial [Echinostoma caproni] Score:161 bits(407), Expect:4e-47, Identities:80/108(74%), Positives:83/108(77%), Gaps:0.108(0%) emb|VDP83 0 MADDEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 |||.|||||||||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ emb|VDP83 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||||||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "emb|CAA50205.1|") self.assertEqual(hit.target.name, "CAA50205") self.assertEqual( hit.target.description, "actin, partial [Entamoeba histolytica]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=137)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 401.0) self.assertAlmostEqual(hsp.annotations["bit score"], 159.073) self.assertAlmostEqual(hsp.annotations["evalue"], 4.85734e-47, places=52) self.assertEqual(hsp.annotations["identity"], 78) self.assertEqual(hsp.annotations["positive"], 81) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MGDEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHVSVMAGMGQKDAY...LTE'}, length=137)", ) self.assertEqual(hsp.target.id, "emb|CAA50205.1|") self.assertEqual(hsp.target.name, "CAA50205") self.assertEqual( hsp.target.description, "actin, partial [Entamoeba histolytica]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "M DEEVQALVVDNGSGMCKAG ++ P G KD+YVGDEAQSKRGILTLKYPIEHGIV NWDDMEKIWHHTFYNELRVAPEEHPVLLTE", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: emb|CAA50205.1| Length: 137 Strand: Plus actin, partial [Entamoeba histolytica] Score:159 bits(401), Expect:5e-47, Identities:78/108(72%), Positives:81/108(75%), Gaps:0.108(0%) emb|CAA50 0 MGDEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHVSVMAGMGQKDAYVGDEAQ 0 |.|||||||||||||||||||...........|...............|.||.||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ emb|CAA50 60 SKRGILTLKYPIEHGIVNNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||.|||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "emb|VDN44756.1|") self.assertEqual(hit.target.name, "VDN44756") self.assertEqual( hit.target.description, "unnamed protein product, partial [Dibothriocephalus latus]", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=145)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 400.0) self.assertAlmostEqual(hsp.annotations["bit score"], 158.688) self.assertAlmostEqual(hsp.annotations["evalue"], 6.88203e-47, places=52) self.assertEqual(hsp.annotations["identity"], 78) self.assertEqual(hsp.annotations["positive"], 82) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MGDEDVQALVIDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=145)", ) self.assertEqual(hsp.target.id, "emb|VDN44756.1|") self.assertEqual(hsp.target.name, "VDN44756") self.assertEqual( hsp.target.description, "unnamed protein product, partial [Dibothriocephalus latus]", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "M DE+VQALV+DNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: emb|VDN44756.1| Length: 145 Strand: Plus unnamed protein product, partial [Dibothriocephalus latus] Score:158 bits(400), Expect:7e-47, Identities:78/108(72%), Positives:82/108(76%), Gaps:0.108(0%) emb|VDN44 0 MGDEDVQALVIDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 |.||.|||||.||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ emb|VDN44 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||||||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_027046469.1|") self.assertEqual(hit.target.name, "XP_027046469") self.assertEqual( hit.target.description, "actin-1, partial [Pocillopora damicornis]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=122)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 398.0) self.assertAlmostEqual(hsp.annotations["bit score"], 157.918) self.assertAlmostEqual(hsp.annotations["evalue"], 7.4607e-47, places=52) self.assertEqual(hsp.annotations["identity"], 78) self.assertEqual(hsp.annotations["positive"], 82) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MADEEVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=122)", ) self.assertEqual(hsp.target.id, "ref|XP_027046469.1|") self.assertEqual(hsp.target.name, "XP_027046469") self.assertEqual( hsp.target.description, "actin-1, partial [Pocillopora damicornis]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MADEEV ALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELR+APEEHP+LLTE", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: ref|XP_027046469.1| Length: 122 Strand: Plus actin-1, partial [Pocillopora damicornis] Score:157 bits(398), Expect:7e-47, Identities:78/108(72%), Positives:82/108(76%), Gaps:0.108(0%) ref|XP_02 0 MADEEVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 ||||||.||||||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ ref|XP_02 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRIAPEEHPILLTE 108 60 ||||||||||||||||||||||||||||||||||||.||||||.|||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "ref|XP_027046487.1|") self.assertEqual(hit.target.name, "XP_027046487") self.assertEqual( hit.target.description, "actin-1-like [Pocillopora damicornis]" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=134)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 399.0) self.assertAlmostEqual(hsp.annotations["bit score"], 158.303) self.assertAlmostEqual(hsp.annotations["evalue"], 9.11071e-47, places=52) self.assertEqual(hsp.annotations["identity"], 79) self.assertEqual(hsp.annotations["positive"], 82) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 108], [ 0, 108]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 108)) self.assertEqual( repr(hsp.query.seq), "Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')", ) self.assertEqual(hsp.query.id, "AI021773.1") self.assertEqual( hsp.query.description, "MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(108))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343") self.assertEqual( repr(hsp.target.seq), "Seq({0: 'MADEDVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=134)", ) self.assertEqual(hsp.target.id, "ref|XP_027046487.1|") self.assertEqual(hsp.target.name, "XP_027046487") self.assertEqual( hsp.target.description, "actin-1-like [Pocillopora damicornis]" ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MADE+V ALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE", ) self.assertEqual( str(hsp), """\ Query : AI021773.1 Length: 108 Strand: Plus MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence Target: ref|XP_027046487.1| Length: 134 Strand: Plus actin-1-like [Pocillopora damicornis] Score:158 bits(399), Expect:9e-47, Identities:79/108(73%), Positives:82/108(76%), Gaps:0.108(0%) ref|XP_02 0 MADEDVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ 0 ||||.|.||||||||||||||...........|...............|.|||||||||| AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ ref|XP_02 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 60 |||||||||||||||||||||||||||||||||||||||||||||||| 108 AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108 """, ) def test_xml2_21500_blastx_001_parser(self): """Parsing BLASTX 2.15.0+ (xml2_21500_blastx_001.xml).""" filename = "xml2_21500_blastx_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_blastx_001_records(records, xml2=True) with Blast.parse(path) as records: self.check_xml_21500_blastx_001_records(records, xml2=True) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_blastx_001_record(record, xml2=True) record = Blast.read(path) self.check_xml_21500_blastx_001_record(record, xml2=True) def test_xml_21500_blastx_001_writer(self): """Writing BLASTX 2.15.0+ (xml_21500_blastx_001.xml).""" filename = "xml_21500_blastx_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_blastx_001_records(written_records, xml2=False) def test_xml2_21500_blastx_001_writer(self): """Writing BLASTX 2.15.0+ XML2 (xml2_21500_blastx_001.xml).""" filename = "xml2_21500_blastx_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream, fmt="XML2") self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_blastx_001_records(written_records, xml2=True) class TestTBlastn(unittest.TestCase): """Test the Blast XML parser for tblastn output.""" def test_xml_21500_tblastn_001_parser(self): """Parsing TBLASTN 2.15.0+ (xml_21500_tblastn_001.xml).""" filename = "xml_21500_tblastn_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_tblastn_001_records(records) with Blast.parse(path) as records: self.check_xml_21500_tblastn_001_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_tblastn_001_record(record, xml2=False) record = Blast.read(path) self.check_xml_21500_tblastn_001_record(record, xml2=False) with Blast.parse(path) as records: self.assertEqual( str(records), """\ Program: TBLASTN 2.15.0+ db: nt Query: CAJ99216.1 (length=234) tim [Helicobacter acinonychis str. Sheeba] Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gi|109713861|emb|AM260522.1| Helicobacter acinonychis ... 1 1 gi|1336928286|emb|LT900055.1| Helicobacter acinonychis... 2 1 gi|1033012332|gb|CP011486.1| Helicobacter pylori strai... 3 1 gi|2641533851|gb|CP078169.1| Helicobacter pylori strai... 4 1 gi|2641529532|gb|CP078166.1| Helicobacter pylori strai... 5 1 gi|1033009499|gb|CP011484.1| Helicobacter pylori strai... 6 1 gi|317010283|gb|CP002336.1| Helicobacter pylori SouthA... 7 1 gi|2641538240|gb|CP078172.1| Helicobacter pylori strai... 8 1 gi|2640367186|gb|CP079077.1| Helicobacter pylori strai... 9 1 gi|532105813|gb|CP006691.1| Helicobacter pylori SouthA...""", ) def test_xml2_21500_tblastn_001_parser(self): """Parsing TBLASTN 2.15.0+ (xml2_21500_tblastn_001.xml).""" filename = "xml2_21500_tblastn_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_tblastn_001_records(records, xml2=True) with Blast.parse(path) as records: self.check_xml_21500_tblastn_001_records(records, xml2=True) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_tblastn_001_record(record, xml2=True) record = Blast.read(path) self.check_xml_21500_tblastn_001_record(record, xml2=True) def check_xml_21500_tblastn_001_records(self, records, xml2=False): self.assertEqual(records.program, "tblastn") self.assertEqual(records.version, "TBLASTN 2.15.0+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "nt") if xml2: self.assertEqual(len(records.param), 7) else: self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "CAJ99216.1") self.assertEqual( records.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(repr(records.query.seq), "Seq(None, length=234)") self.assertEqual(len(records.param), 5) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 10.0) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "L;") if xml2: self.assertEqual(records.param["cbs"], 2) self.assertEqual(records.param["db-gencode"], 1) record = next(records) self.assertRaises(StopIteration, next, records) self.check_xml_21500_tblastn_001_record(record, xml2) def check_xml_21500_tblastn_001_record(self, record, xml2): if not xml2: self.assertEqual(record.num, 1) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "CAJ99216.1") self.assertEqual( record.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(repr(record.query.seq), "Seq(None, length=234)") if xml2: self.assertEqual(len(record.query.features), 1) feature = record.query.features[0] self.assertEqual(feature.type, "masking") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(101), ExactPosition(116))" ) self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 104338276) self.assertEqual(record.stat["db-len"], 348058846) if xml2: self.assertEqual(record.stat["hsp-len"], 168) self.assertEqual(record.stat["eff-space"], 34567702523838) else: self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|109713861|emb|AM260522.1|") self.assertEqual(hit.target.name, "AM260522") self.assertEqual( hit.target.description, "Helicobacter acinonychis str. Sheeba complete genome, strain Sheeba", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1553927)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1137.0) self.assertAlmostEqual(hsp.annotations["bit score"], 442.58) self.assertAlmostEqual(hsp.annotations["evalue"], 1.28996e-138, places=143) self.assertEqual(hsp.annotations["identity"], 234) self.assertEqual(hsp.annotations["positive"], 234) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|109713861|emb|AM260522.1|") self.assertEqual(hsp.target.name, "AM260522") self.assertEqual( hsp.target.description, "Helicobacter acinonychis str. Sheeba complete genome, strain Sheeba", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1553927))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|109713861|emb|AM260522.1|:325802..326503", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQNAYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNFLKEKFDFFKDKKFKIVYCIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHGFLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|109713861|emb|AM260522.1| Length: 234 Strand: Plus Helicobacter acinonychis str. Sheeba complete genome, strain Sheeba Score:442 bits(1137), Expect:1e-138, Identities:234/234(100%), Positives:234/234(100%), Gaps:0.234(0%) gi|109713 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|109713 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNFLKEKFDFFKDKKFKIVY 60 ||||||||||||||||||||||||||||||||||||||||||...............||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|109713 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|109713 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 180 |||||||||||||||||||||||||||||||||||||||||||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: # xml2 is False self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|109713861|emb|AM260522.1| Length: 234 Strand: Plus Helicobacter acinonychis str. Sheeba complete genome, strain Sheeba Score:442 bits(1137), Expect:1e-138, Identities:234/234(100%), Positives:234/234(100%), Gaps:0.234(0%) gi|109713 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|109713 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNFLKEKFDFFKDKKFKIVY 60 ||||||||||||||||||||||||||||||||||||||||||...............||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|109713 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|109713 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 180 |||||||||||||||||||||||||||||||||||||||||||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1336928286|emb|LT900055.1|") self.assertEqual(hit.target.name, "LT900055") self.assertEqual( hit.target.description, "Helicobacter acinonychis isolate 212_9 genome assembly, chromosome: I", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1550239)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1128.0) self.assertAlmostEqual(hsp.annotations["bit score"], 439.113) self.assertAlmostEqual(hsp.annotations["evalue"], 2.05117e-137, places=142) self.assertEqual(hsp.annotations["identity"], 232) self.assertEqual(hsp.annotations["positive"], 232) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|1336928286|emb|LT900055.1|") self.assertEqual(hsp.target.name, "LT900055") self.assertEqual( hsp.target.description, "Helicobacter acinonychis isolate 212_9 genome assembly, chromosome: I", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1550239))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|1336928286|emb|LT900055.1|:325704..326405", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQNAYPKDCGAFTGEITSKHLEELKINTLLIGHSERR LLKESPNFLKEKFDFFKDKKFKIVYCIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHGFLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|1336928286|emb|LT900055.1| Length: 234 Strand: Plus Helicobacter acinonychis isolate 212_9 genome assembly, chromosome: I Score:439 bits(1128), Expect:2e-137, Identities:232/234(99%), Positives:232/234(99%), Gaps:0.234(0%) gi|133692 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|133692 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRALLKESPNFLKEKFDFFKDKKFKIVY 60 ||||||||||||||||||||||||||||||||||.|||||||...............||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|133692 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|133692 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234 180 |||||||||||||||||||||||||||||||||||||||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: # xml2 is False self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|1336928286|emb|LT900055.1| Length: 234 Strand: Plus Helicobacter acinonychis isolate 212_9 genome assembly, chromosome: I Score:439 bits(1128), Expect:2e-137, Identities:232/234(99%), Positives:232/234(99%), Gaps:0.234(0%) gi|133692 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|133692 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRALLKESPNFLKEKFDFFKDKKFKIVY 60 ||||||||||||||||||||||||||||||||||.|||||||...............||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|133692 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|133692 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234 180 |||||||||||||||||||||||||||||||||||||||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1033012332|gb|CP011486.1|") self.assertEqual(hit.target.name, "CP011486") self.assertEqual( hit.target.description, "Helicobacter pylori strain K26A1, complete genome" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1570310)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1076.0) self.assertAlmostEqual(hsp.annotations["bit score"], 419.083) self.assertAlmostEqual(hsp.annotations["evalue"], 2.02413e-130, places=135) self.assertEqual(hsp.annotations["identity"], 221) self.assertEqual(hsp.annotations["positive"], 224) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|1033012332|gb|CP011486.1|") self.assertEqual(hsp.target.name, "CP011486") self.assertEqual( hsp.target.description, "Helicobacter pylori strain K26A1, complete genome" ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1570310))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|1033012332|gb|CP011486.1|:196043..196744", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKINTLLIGHSERR LLKESP+FLKEKFDFFKDK FKI+YCIGEDLKTREKGL AVKEFLNEQLENIDL Y NLIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVNTQNAKEILGIDSVDGLL+GS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|1033012332|gb|CP011486.1| Length: 234 Strand: Plus Helicobacter pylori strain K26A1, complete genome Score:419 bits(1076), Expect:2e-130, Identities:221/234(94%), Positives:224/234(96%), Gaps:0.234(0%) gi|103301 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|103301 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRTLLKESPSFLKEKFDFFKDKNFKIIY 60 ||||||||||||||||||||||||||||||||||.||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|103301 120 CIGEDLKTREKGLAAVKEFLNEQLENIDLSYHNLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||.|||||||||||||||.|.||||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|103301 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: # xml2 is False self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|1033012332|gb|CP011486.1| Length: 234 Strand: Plus Helicobacter pylori strain K26A1, complete genome Score:419 bits(1076), Expect:2e-130, Identities:221/234(94%), Positives:224/234(96%), Gaps:0.234(0%) gi|103301 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|103301 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRTLLKESPSFLKEKFDFFKDKNFKIIY 60 ||||||||||||||||||||||||||||||||||.||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|103301 120 CIGEDLKTREKGLAAVKEFLNEQLENIDLSYHNLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||.|||||||||||||||.|.||||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|103301 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2641533851|gb|CP078169.1|") self.assertEqual(hit.target.name, "CP078169") self.assertEqual( hit.target.description, "Helicobacter pylori strain HpGP-ZAF-006 chromosome, complete genome", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1641225)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1069.0) self.assertAlmostEqual(hsp.annotations["bit score"], 416.387) self.assertAlmostEqual(hsp.annotations["evalue"], 1.58851e-129, places=134) self.assertEqual(hsp.annotations["identity"], 221) self.assertEqual(hsp.annotations["positive"], 224) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|2641533851|gb|CP078169.1|") self.assertEqual(hsp.target.name, "CP078169") self.assertEqual( hsp.target.description, "Helicobacter pylori strain HpGP-ZAF-006 chromosome, complete genome", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1641225))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|2641533851|gb|CP078169.1|:260420..261121", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERR LLKESP+FLKEKFDFFKDK FKIVYCIGEDLKTREKGLGAVKEFLNEQLENIDL Y NLIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVN QNAKEILGIDSVDGLLIGS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|2641533851|gb|CP078169.1| Length: 234 Strand: Plus Helicobacter pylori strain HpGP-ZAF-006 chromosome, complete genome Score:416 bits(1069), Expect:2e-129, Identities:221/234(94%), Positives:224/234(96%), Gaps:0.234(0%) gi|264153 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|264153 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRALLKESPSFLKEKFDFFKDKNFKIVY 60 |||||||||||||||||||||||.||||||||||.||||||................||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|264153 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHNLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|.||||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|264153 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||.|||||||||||||||||||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: # xml2 is False self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|2641533851|gb|CP078169.1| Length: 234 Strand: Plus Helicobacter pylori strain HpGP-ZAF-006 chromosome, complete genome Score:416 bits(1069), Expect:2e-129, Identities:221/234(94%), Positives:224/234(96%), Gaps:0.234(0%) gi|264153 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|264153 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRALLKESPSFLKEKFDFFKDKNFKIVY 60 |||||||||||||||||||||||.||||||||||.||||||................||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|264153 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHNLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|.||||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|264153 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||.|||||||||||||||||||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2641529532|gb|CP078166.1|") self.assertEqual(hit.target.name, "CP078166") self.assertEqual( hit.target.description, "Helicobacter pylori strain HpGP-ZAF-009 chromosome, complete genome", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1681930)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1067.0) self.assertAlmostEqual(hsp.annotations["bit score"], 415.616) self.assertAlmostEqual(hsp.annotations["evalue"], 3.21842e-129, places=134) self.assertEqual(hsp.annotations["identity"], 220) self.assertEqual(hsp.annotations["positive"], 226) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|2641529532|gb|CP078166.1|") self.assertEqual(hsp.target.name, "CP078166") self.assertEqual( hsp.target.description, "Helicobacter pylori strain HpGP-ZAF-009 chromosome, complete genome", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1681930))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|2641529532|gb|CP078166.1|:198327..199028", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERR LLKESP+FLKEKFDFFKDKKFKI+YCIGEDLKTREKGLGAVKEFLNEQLENIDL Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVNTQNAKEILGIDSVDGLL+GS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|2641529532|gb|CP078166.1| Length: 234 Strand: Plus Helicobacter pylori strain HpGP-ZAF-009 chromosome, complete genome Score:415 bits(1067), Expect:3e-129, Identities:220/234(94%), Positives:226/234(97%), Gaps:0.234(0%) gi|264152 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|264152 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRTLLKESPSFLKEKFDFFKDKKFKIIY 60 |||||||||||||||||||||||.||||||||||.||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|264152 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|264152 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: # xml2 is False self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|2641529532|gb|CP078166.1| Length: 234 Strand: Plus Helicobacter pylori strain HpGP-ZAF-009 chromosome, complete genome Score:415 bits(1067), Expect:3e-129, Identities:220/234(94%), Positives:226/234(97%), Gaps:0.234(0%) gi|264152 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|264152 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRTLLKESPSFLKEKFDFFKDKKFKIIY 60 |||||||||||||||||||||||.||||||||||.||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|264152 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|264152 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1033009499|gb|CP011484.1|") self.assertEqual(hit.target.name, "CP011484") self.assertEqual( hit.target.description, "Helicobacter pylori strain CC33C, complete genome" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1659899)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1066.0) self.assertAlmostEqual(hsp.annotations["bit score"], 415.231) self.assertAlmostEqual(hsp.annotations["evalue"], 4.05935e-129, places=134) self.assertEqual(hsp.annotations["identity"], 220) self.assertEqual(hsp.annotations["positive"], 227) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSSMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|1033009499|gb|CP011484.1|") self.assertEqual(hsp.target.name, "CP011484") self.assertEqual( hsp.target.description, "Helicobacter pylori strain CC33C, complete genome" ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1659899))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|1033009499|gb|CP011484.1|:248047..248748", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKS+MPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERRVLLKESP+FLKEKFDFFKDKKFKIVYCIGEDLKTREKGLGAVKEFLNEQLENIDL+Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVN QNAKEILGIDSVDGLL+GS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|1033009499|gb|CP011484.1| Length: 234 Strand: Plus Helicobacter pylori strain CC33C, complete genome Score:415 bits(1066), Expect:4e-129, Identities:220/234(94%), Positives:227/234(97%), Gaps:0.234(0%) gi|103300 0 MTKIAMANFKSSMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||.|||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|103300 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIVY 60 |||||||||||||||||||||||.|||||||||||||||||................||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|103300 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLNYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|103300 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: # xml2 is False self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|1033009499|gb|CP011484.1| Length: 234 Strand: Plus Helicobacter pylori strain CC33C, complete genome Score:415 bits(1066), Expect:4e-129, Identities:220/234(94%), Positives:227/234(97%), Gaps:0.234(0%) gi|103300 0 MTKIAMANFKSSMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||.|||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|103300 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIVY 60 |||||||||||||||||||||||.|||||||||||||||||................||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|103300 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLNYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|103300 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|317010283|gb|CP002336.1|") self.assertEqual(hit.target.name, "CP002336") self.assertEqual( hit.target.description, "Helicobacter pylori SouthAfrica7, complete genome", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1653913)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1066.0) self.assertAlmostEqual(hsp.annotations["bit score"], 415.231) self.assertAlmostEqual(hsp.annotations["evalue"], 4.73879e-129, places=134) self.assertEqual(hsp.annotations["identity"], 221) self.assertEqual(hsp.annotations["positive"], 226) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|317010283|gb|CP002336.1|") self.assertEqual(hsp.target.name, "CP002336") self.assertEqual( hsp.target.description, "Helicobacter pylori SouthAfrica7, complete genome", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1653913))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|317010283|gb|CP002336.1|:194539..195240", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKD GAFTGEITSKHLEELKINTLLIGHSERRVLLKESP+FLKEKFDFFKDKKFKI+YCIGEDLKTREKGLGAVKEFLNEQLENIDL Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVNTQNAKEILGIDSVDGLL+GS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|317010283|gb|CP002336.1| Length: 234 Strand: Plus Helicobacter pylori SouthAfrica7, complete genome Score:415 bits(1066), Expect:5e-129, Identities:221/234(94%), Positives:226/234(97%), Gaps:0.234(0%) gi|317010 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|317010 60 AYPKDYGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIIY 60 |||||.|||||||||||||||||||||||||||||||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|317010 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|317010 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: # xml2 is False self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|317010283|gb|CP002336.1| Length: 234 Strand: Plus Helicobacter pylori SouthAfrica7, complete genome Score:415 bits(1066), Expect:5e-129, Identities:221/234(94%), Positives:226/234(97%), Gaps:0.234(0%) gi|317010 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|317010 60 AYPKDYGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIIY 60 |||||.|||||||||||||||||||||||||||||||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|317010 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|317010 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2641538240|gb|CP078172.1|") self.assertEqual(hit.target.name, "CP078172") self.assertEqual( hit.target.description, "Helicobacter pylori strain HpGP-ZAF-001 chromosome, complete genome", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1714499)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1062.0) self.assertAlmostEqual(hsp.annotations["bit score"], 413.69) self.assertAlmostEqual(hsp.annotations["evalue"], 1.37323e-128, places=133) self.assertEqual(hsp.annotations["identity"], 219) self.assertEqual(hsp.annotations["positive"], 225) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|2641538240|gb|CP078172.1|") self.assertEqual(hsp.target.name, "CP078172") self.assertEqual( hsp.target.description, "Helicobacter pylori strain HpGP-ZAF-001 chromosome, complete genome", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1714499))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "complement(gi|2641538240|gb|CP078172.1|:1021670..1022371)", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERR LLKESP+FLKEKFDFFKDKKFKI+YCIGEDLKTREKGLGAVKEFLNEQLENIDL Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVN QNAKEILGIDSVDGLL+GS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|2641538240|gb|CP078172.1| Length: 234 Strand: Plus Helicobacter pylori strain HpGP-ZAF-001 chromosome, complete genome Score:413 bits(1062), Expect:1e-128, Identities:219/234(94%), Positives:225/234(96%), Gaps:0.234(0%) gi|264153 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|264153 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRTLLKESPSFLKEKFDFFKDKKFKIIY 60 |||||||||||||||||||||||.||||||||||.||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|264153 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|264153 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: # xml2 is False self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|2641538240|gb|CP078172.1| Length: 234 Strand: Plus Helicobacter pylori strain HpGP-ZAF-001 chromosome, complete genome Score:413 bits(1062), Expect:1e-128, Identities:219/234(94%), Positives:225/234(96%), Gaps:0.234(0%) gi|264153 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|264153 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRTLLKESPSFLKEKFDFFKDKKFKIIY 60 |||||||||||||||||||||||.||||||||||.||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|264153 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 |||||||||||||||||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|264153 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2640367186|gb|CP079077.1|") self.assertEqual(hit.target.name, "CP079077") self.assertEqual( hit.target.description, "Helicobacter pylori strain HpGP-ARG-001 chromosome, complete genome", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1624657)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1059.0) self.assertAlmostEqual(hsp.annotations["bit score"], 412.535) self.assertAlmostEqual(hsp.annotations["evalue"], 3.5777e-128, places=132) self.assertEqual(hsp.annotations["identity"], 220) self.assertEqual(hsp.annotations["positive"], 225) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHFDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|2640367186|gb|CP079077.1|") self.assertEqual(hsp.target.name, "CP079077") self.assertEqual( hsp.target.description, "Helicobacter pylori strain HpGP-ARG-001 chromosome, complete genome", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1624657))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|2640367186|gb|CP079077.1|:197481..198182", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQH DRVFVFPDFLGLLPNAFLHFTLGVQNAYP+DCGAFTGEITSKHLEELKI+TLLIGHSERRVLLKESP+FLKEKFDFFKDK FKIVYCIGEDLKTREKG AVKEFL+EQLENIDL+Y NLIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVNTQNAKEILGIDSVDGLLIGS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|2640367186|gb|CP079077.1| Length: 234 Strand: Plus Helicobacter pylori strain HpGP-ARG-001 chromosome, complete genome Score:412 bits(1059), Expect:4e-128, Identities:220/234(94%), Positives:225/234(96%), Gaps:0.234(0%) gi|264036 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHFDRVFVFPDFLGLLPNAFLHFTLGVQN 0 |||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|264036 60 AYPRDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKNFKIVY 60 |||.|||||||||||||||||||.|||||||||||||||||................||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|264036 120 CIGEDLKTREKGFKAVKEFLSEQLENIDLNYSNLIVAYEPIWAIGTKKSASLEDIYLTHG 120 ||||||||||||..||||||.||||||||.|.||||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|264036 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||||||||||||||||||||||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|2640367186|gb|CP079077.1| Length: 234 Strand: Plus Helicobacter pylori strain HpGP-ARG-001 chromosome, complete genome Score:412 bits(1059), Expect:4e-128, Identities:220/234(94%), Positives:225/234(96%), Gaps:0.234(0%) gi|264036 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHFDRVFVFPDFLGLLPNAFLHFTLGVQN 0 |||||||||||||||||||||||||||||||||.|||||||||||||||||||||||||| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|264036 60 AYPRDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKNFKIVY 60 |||.|||||||||||||||||||.|||||||||||||||||................||| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|264036 120 CIGEDLKTREKGFKAVKEFLSEQLENIDLNYSNLIVAYEPIWAIGTKKSASLEDIYLTHG 120 ||||||||||||..||||||.||||||||.|.||||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|264036 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||||||||||||||||||||||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|532105813|gb|CP006691.1|") self.assertEqual(hit.target.name, "CP006691") self.assertEqual( hit.target.description, "Helicobacter pylori SouthAfrica20, complete genome" ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1622903)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 1057.0) self.assertAlmostEqual(hsp.annotations["bit score"], 411.764) self.assertAlmostEqual(hsp.annotations["evalue"], 7.68167e-128, places=133) self.assertEqual(hsp.annotations["identity"], 218) self.assertEqual(hsp.annotations["positive"], 225) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 234], [ 0, 234]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 234)) self.assertEqual( repr(hsp.query.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.query.id, "CAJ99216.1") self.assertEqual( hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]" ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')", ) self.assertEqual(hsp.target.id, "gi|532105813|gb|CP006691.1|") self.assertEqual(hsp.target.name, "CP006691") self.assertEqual( hsp.target.description, "Helicobacter pylori SouthAfrica20, complete genome" ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(1622903))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|532105813|gb|CP006691.1|:197836..198537", ) self.assertEqual( hsp.annotations["midline"], "MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERRVLLKESP+FLKEKFDFFKDKKFKI+YCIGEDLKTREKG AVKEFLNEQLENIDL+Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVN QNAKEILGIDSVDGLL+GS SLELENFKTIISFL", ) if xml2: # xml2 is True self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|532105813|gb|CP006691.1| Length: 234 Strand: Plus Helicobacter pylori SouthAfrica20, complete genome Score:411 bits(1057), Expect:8e-128, Identities:218/234(93%), Positives:225/234(96%), Gaps:0.234(0%) gi|532105 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|532105 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIIY 60 |||||||||||||||||||||||.|||||||||||||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY gi|532105 120 CIGEDLKTREKGFNAVKEFLNEQLENIDLNYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 ||||||||||||..|||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|532105 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) else: self.assertEqual( str(hsp), """\ Query : CAJ99216.1 Length: 234 Strand: Plus tim [Helicobacter acinonychis str. Sheeba] Target: gi|532105813|gb|CP006691.1| Length: 234 Strand: Plus Helicobacter pylori SouthAfrica20, complete genome Score:411 bits(1057), Expect:8e-128, Identities:218/234(93%), Positives:225/234(96%), Gaps:0.234(0%) gi|532105 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN 0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.|| CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN gi|532105 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIIY 60 |||||||||||||||||||||||.|||||||||||||||||................|.| CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY gi|532105 120 CIGEDLKTREKGFNAVKEFLNEQLENIDLNYHHLIVAYEPIWAIGTKKSASLEDIYLTHG 120 ||||||||||||..|||||||||||||||.|..|||||||||||||.||||||||||||| CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG gi|532105 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234 180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234 CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234 """, ) def test_xml_21500_tblastn_001_writer(self): """Writing TBLASTN 2.15.0+ (xml_21500_tblastn_001.xml).""" filename = "xml_21500_tblastn_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_tblastn_001_records(written_records) def test_xml2_21500_tblastn_001_writer(self): """Writing TBLASTN 2.15.0+ XML2 (xml2_21500_tblastn_001.xml).""" filename = "xml2_21500_tblastn_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream, fmt="XML2") self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_tblastn_001_records(written_records, xml2=True) class TestTBlastx(unittest.TestCase): """Test the Blast XML parser for tblastx output.""" def test_xml_2226_tblastx_004(self): """Parsing TBLASTX 2.2.26+ (xml_2226_tblastx_004.xml).""" filename = "xml_2226_tblastx_004.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_2226_tblastx_004(records) with Blast.parse(path) as records: self.check_xml_2226_tblastx_004(records) with Blast.parse(path) as records: self.assertEqual( str(records), """\ Program: TBLASTX 2.2.26+ db: refseq_rna Query: Query_1 (length=128) random_s00 Hits: No hits found Query: Query_2 (length=350) gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 7 gi|296147483|ref|NM_001183135.1| Saccharomyces cerevis... 1 6 gi|365982352|ref|XM_003667962.1| Naumovozyma dairenens... 2 4 gi|366988334|ref|XM_003673886.1| Naumovozyma castellii... 3 2 gi|255710474|ref|XM_002551475.1| Lachancea thermotoler... 4 4 gi|254579534|ref|XM_002495708.1| Zygosaccharomyces rou...""", ) def check_xml_2226_tblastx_004(self, records): self.assertEqual(records.program, "tblastx") self.assertEqual(records.version, "TBLASTX 2.2.26+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "refseq_rna") self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "Query_1") self.assertEqual(records.query.description, "random_s00") self.assertEqual(repr(records.query.seq), "Seq(None, length=128)") self.assertEqual(len(records.param), 5) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 10.0) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "L;") record = next(records) self.assertEqual(record.num, 1) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "Query_1") self.assertEqual(record.query.description, "random_s00") self.assertEqual(repr(record.query.seq), "Seq(None, length=128)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 2933984) self.assertEqual(record.stat["db-len"], 4726730735) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.0) self.assertAlmostEqual(record.stat["lambda"], 0.0) self.assertAlmostEqual(record.stat["entropy"], 0.0) self.assertEqual(len(record), 0) record = next(records) self.assertEqual(record.num, 2) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "Query_2") self.assertEqual( record.query.description, "gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=350)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 2933984) self.assertEqual(record.stat["db-len"], 4726730735) self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.0) self.assertAlmostEqual(record.stat["lambda"], 0.0) self.assertAlmostEqual(record.stat["entropy"], 0.0) self.assertEqual(len(record), 5) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|296147483|ref|NM_001183135.1|") self.assertEqual(hit.target.name, "NM_001183135") self.assertEqual( hit.target.description, "Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds >gi|116616412|gb|EF059095.1| Synthetic construct Saccharomyces cerevisiae clone FLH203015.01X MON2, complete sequence", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=4911)") self.assertEqual(len(hit), 7) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 626.0) self.assertAlmostEqual(hsp.annotations["bit score"], 289.739) self.assertAlmostEqual(hsp.annotations["evalue"], 1.05874e-76, places=81) self.assertEqual(hsp.annotations["identity"], 116) self.assertEqual(hsp.annotations["positive"], 116) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 602.0) self.assertAlmostEqual(hsp.annotations["bit score"], 278.742) self.assertAlmostEqual(hsp.annotations["evalue"], 2.16381e-73, places=78) self.assertEqual(hsp.annotations["identity"], 116) self.assertEqual(hsp.annotations["positive"], 116) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[2] self.assertEqual(hsp.num, 3) self.assertAlmostEqual(hsp.score, 593.0) self.assertAlmostEqual(hsp.annotations["bit score"], 274.618) self.assertAlmostEqual(hsp.annotations["evalue"], 3.77251e-72, places=77) self.assertEqual(hsp.annotations["identity"], 116) self.assertEqual(hsp.annotations["positive"], 116) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[3] self.assertEqual(hsp.num, 4) self.assertAlmostEqual(hsp.score, 583.0) self.assertAlmostEqual(hsp.annotations["bit score"], 270.036) self.assertAlmostEqual(hsp.annotations["evalue"], 9.03598e-71, places=76) self.assertEqual(hsp.annotations["identity"], 116) self.assertEqual(hsp.annotations["positive"], 116) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[4] self.assertEqual(hsp.num, 5) self.assertAlmostEqual(hsp.score, 495.0) self.assertAlmostEqual(hsp.annotations["bit score"], 229.713) self.assertAlmostEqual(hsp.annotations["evalue"], 1.24226e-58, places=63) self.assertEqual(hsp.annotations["identity"], 116) self.assertEqual(hsp.annotations["positive"], 116) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[5] self.assertEqual(hsp.num, 6) self.assertAlmostEqual(hsp.score, 425.0) self.assertAlmostEqual(hsp.annotations["bit score"], 197.639) self.assertAlmostEqual(hsp.annotations["evalue"], 9.12288e-54, places=59) self.assertEqual(hsp.annotations["identity"], 85) self.assertEqual(hsp.annotations["positive"], 85) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[6] self.assertEqual(hsp.num, 7) self.assertAlmostEqual(hsp.score, 73.0) self.assertAlmostEqual(hsp.annotations["bit score"], 36.3494) self.assertAlmostEqual(hsp.annotations["evalue"], 9.12288e-54, places=59) self.assertEqual(hsp.annotations["identity"], 14) self.assertEqual(hsp.annotations["positive"], 14) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 14], [ 0, 14]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 14)) self.assertEqual(repr(hsp.query.seq), "Seq('MAMNTGGFDSMQRQ')") self.assertEqual(hsp.query.id, "Query_2") self.assertEqual( hsp.query.description, "gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(14))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "Query_2:1..42") self.assertEqual(repr(hsp.target.seq), "Seq('MAMNTGGFDSMQRQ')") self.assertEqual(hsp.target.id, "gi|296147483|ref|NM_001183135.1|") self.assertEqual(hsp.target.name, "NM_001183135") self.assertEqual( hsp.target.description, "Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds >gi|116616412|gb|EF059095.1| Synthetic construct Saccharomyces cerevisiae clone FLH203015.01X MON2, complete sequence", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(4911))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|296147483|ref|NM_001183135.1|:1..42" ) self.assertEqual(hsp.annotations["midline"], "MAMNTGGFDSMQRQ") self.assertEqual( str(hsp), """\ Query : Query_2 Length: 14 Strand: Plus gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds Target: gi|296147483|ref|NM_001183135.1| Length: 14 Strand: Plus Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds >gi|116616412|gb|EF059095.1| Synthetic construct Saccharomyces cerevisiae clone FLH203015.01X MON2, complete sequence Score:36 bits(73), Expect:9e-54, Identities:14/14(100%), Positives:14/14(100%), Gaps:0.14(0%) gi|296147 0 MAMNTGGFDSMQRQ 14 0 |||||||||||||| 14 Query_2 0 MAMNTGGFDSMQRQ 14 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|365982352|ref|XM_003667962.1|") self.assertEqual(hit.target.name, "XM_003667962") self.assertEqual( hit.target.description, "Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=4932)") self.assertEqual(len(hit), 6) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 327.0) self.assertAlmostEqual(hsp.annotations["bit score"], 152.734) self.assertAlmostEqual(hsp.annotations["evalue"], 2.38069e-37, places=42) self.assertEqual(hsp.annotations["identity"], 62) self.assertEqual(hsp.annotations["positive"], 73) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 51.0) self.assertAlmostEqual(hsp.annotations["bit score"], 26.2688) self.assertAlmostEqual(hsp.annotations["evalue"], 2.38069e-37, places=42) self.assertEqual(hsp.annotations["identity"], 11) self.assertEqual(hsp.annotations["positive"], 11) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[2] self.assertEqual(hsp.num, 3) self.assertAlmostEqual(hsp.score, 142.0) self.assertAlmostEqual(hsp.annotations["bit score"], 67.9658) self.assertAlmostEqual(hsp.annotations["evalue"], 4.80116e-20, places=25) self.assertEqual(hsp.annotations["identity"], 34) self.assertEqual(hsp.annotations["positive"], 38) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[3] self.assertEqual(hsp.num, 4) self.assertAlmostEqual(hsp.score, 109.0) self.assertAlmostEqual(hsp.annotations["bit score"], 52.8449) self.assertAlmostEqual(hsp.annotations["evalue"], 4.80116e-20, places=25) self.assertEqual(hsp.annotations["identity"], 24) self.assertEqual(hsp.annotations["positive"], 29) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[4] self.assertEqual(hsp.num, 5) self.assertAlmostEqual(hsp.score, 127.0) self.assertAlmostEqual(hsp.annotations["bit score"], 61.0927) self.assertAlmostEqual(hsp.annotations["evalue"], 7.14684e-08, places=13) self.assertEqual(hsp.annotations["identity"], 36) self.assertEqual(hsp.annotations["positive"], 52) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[5] self.assertEqual(hsp.num, 6) self.assertAlmostEqual(hsp.score, 87.0) self.assertAlmostEqual(hsp.annotations["bit score"], 42.7643) self.assertAlmostEqual(hsp.annotations["evalue"], 0.0235231) self.assertEqual(hsp.annotations["identity"], 28) self.assertEqual(hsp.annotations["positive"], 36) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 65], [ 0, 65]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 65)) self.assertEqual( repr(hsp.query.seq), "Seq('G*QSL*ALHCQGRHFSIP*LASQHERECEIRMSF*LLKTMYSFQYLNGFITSMA...FGR')", ) self.assertEqual(hsp.query.id, "Query_2") self.assertEqual( hsp.query.description, "gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(65))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(Query_2:67..261)") self.assertEqual( repr(hsp.target.seq), "Seq('GRQSL*ALHGN*S*FGISR*TGQDQRCNKIRVPYKFFYILNSL*NID*FVASMF...F*R')", ) self.assertEqual(hsp.target.id, "gi|365982352|ref|XM_003667962.1|") self.assertEqual(hsp.target.name, "XM_003667962") self.assertEqual( hsp.target.description, "Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(4932))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "complement(gi|365982352|ref|XM_003667962.1|:61..255)", ) self.assertEqual( hsp.annotations["midline"], "G QSL*ALH F I Q +R +IR+ + + S ++ F+ SM NG*ISSFRF R", ) self.assertEqual( str(hsp), """\ Query : Query_2 Length: 65 Strand: Plus gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds Target: gi|365982352|ref|XM_003667962.1| Length: 65 Strand: Plus Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA Score:42 bits(87), Expect:0.02, Identities:28/65(43%), Positives:36/65(55%), Gaps:0.65(0%) gi|365982 0 GRQSL*ALHGN*S*FGISR*TGQDQRCNKIRVPYKFFYILNSL*NID*FVASMFNG*ISS 0 |.|||||||.....|.|.....|..|...||..........|......|..||.|||||| Query_2 0 G*QSL*ALHCQGRHFSIP*LASQHERECEIRMSF*LLKTMYSFQYLNGFITSMANG*ISS gi|365982 60 FRF*R 65 60 |||.| 65 Query_2 60 FRFGR 65 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|366988334|ref|XM_003673886.1|") self.assertEqual(hit.target.name, "XM_003673886") self.assertEqual( hit.target.description, "Naumovozyma castellii CBS 4309 hypothetical protein (NCAS0A09950) mRNA, complete cds", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=4938)") self.assertEqual(len(hit), 4) self.assertEqual( repr(hit), "", ) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 306.0) self.assertAlmostEqual(hsp.annotations["bit score"], 143.112) self.assertAlmostEqual(hsp.annotations["evalue"], 1.45826e-32, places=37) self.assertEqual(hsp.annotations["identity"], 58) self.assertEqual(hsp.annotations["positive"], 71) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 130.0) self.assertAlmostEqual(hsp.annotations["bit score"], 62.4673) self.assertAlmostEqual(hsp.annotations["evalue"], 5.61057e-16, places=21) self.assertEqual(hsp.annotations["identity"], 30) self.assertEqual(hsp.annotations["positive"], 36) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[2] self.assertEqual(hsp.num, 3) self.assertAlmostEqual(hsp.score, 91.0) self.assertAlmostEqual(hsp.annotations["bit score"], 44.5971) self.assertAlmostEqual(hsp.annotations["evalue"], 5.61057e-16, places=21) self.assertEqual(hsp.annotations["identity"], 20) self.assertEqual(hsp.annotations["positive"], 24) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[3] self.assertEqual(hsp.num, 4) self.assertAlmostEqual(hsp.score, 112.0) self.assertAlmostEqual(hsp.annotations["bit score"], 54.2195) self.assertAlmostEqual(hsp.annotations["evalue"], 8.37784e-06, places=11) self.assertEqual(hsp.annotations["identity"], 38) self.assertEqual(hsp.annotations["positive"], 58) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 115], [ 0, 115]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 115)) self.assertEqual( repr(hsp.query.seq), "Seq('*LNLHREMSSLNEGIQNFRQPASRNRWNG*QSL*ALHCQGRHFSIP*LASQHER...HGH')", ) self.assertEqual(hsp.query.id, "Query_2") self.assertEqual( hsp.query.description, "gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(115))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(Query_2:1..345)") self.assertEqual( repr(hsp.target.seq), "Seq('*LYFNCKMCRFYKSIENSR*SAFRYGWCRC*AL*TLHSNRGQLSVP**T*FYQW...NSH')", ) self.assertEqual(hsp.target.id, "gi|366988334|ref|XM_003673886.1|") self.assertEqual(hsp.target.name, "XM_003673886") self.assertEqual( hsp.target.description, "Naumovozyma castellii CBS 4309 hypothetical protein (NCAS0A09950) mRNA, complete cds", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(4938))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "complement(gi|366988334|ref|XM_003673886.1|:1..345)", ) self.assertEqual( hsp.annotations["midline"], "*L + +M + I+N R A R W +L* LH S+P* ++ +IRM+ + + SF L+ +T M + + SFRFGR QF +L L G+K S++ H", ) self.assertEqual( str(hsp), """\ Query : Query_2 Length: 115 Strand: Plus gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds Target: gi|366988334|ref|XM_003673886.1| Length: 115 Strand: Plus Naumovozyma castellii CBS 4309 hypothetical protein (NCAS0A09950) mRNA, complete cds Score:54 bits(112), Expect:8e-06, Identities:38/115(33%), Positives:58/115(50%), Gaps:0.115(0%) gi|366988 0 *LYFNCKMCRFYKSIENSR*SAFRYGWCRC*AL*TLHSNRGQLSVP**T*FYQWSDKIRM 0 ||.....|......|.|.|..|.|..|.....||.||......|.||..........||| Query_2 0 *LNLHREMSSLNEGIQNFRQPASRNRWNG*QSL*ALHCQGRHFSIP*LASQHERECEIRM gi|366988 60 ACQIFNVLDSF*DLDRLVTCMLDRSVPSFRFGRQ*MQF*VQLFLKGLKTCSLNSH 115 60 .........||..|....|.|......||||||...||...|.|.|.|..|...| 115 Query_2 60 SF*LLKTMYSFQYLNGFITSMANG*ISSFRFGR*RTQFCFKLPLHGVKPSSVHGH 115 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|255710474|ref|XM_002551475.1|") self.assertEqual(hit.target.name, "XM_002551475") self.assertEqual( hit.target.description, "Lachancea thermotolerans CBS 6340 KLTH0A01342p (KLTH0A01342g) mRNA, complete cds", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=4845)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 303.0) self.assertAlmostEqual(hsp.annotations["bit score"], 141.737) self.assertAlmostEqual(hsp.annotations["evalue"], 3.78129e-32, places=37) self.assertEqual(hsp.annotations["identity"], 55) self.assertEqual(hsp.annotations["positive"], 71) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 92.0) self.assertAlmostEqual(hsp.annotations["bit score"], 45.0554) self.assertAlmostEqual(hsp.annotations["evalue"], 0.00480643) self.assertEqual(hsp.annotations["identity"], 25) self.assertEqual(hsp.annotations["positive"], 29) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 55], [ 0, 55]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 55)) self.assertEqual( repr(hsp.query.seq), "Seq('TFN*ISIAR*VASMKASKISDSRLRGIDGTVDSPCRHCIARVVILAFLDWQANTK')", ) self.assertEqual(hsp.query.id, "Query_2") self.assertEqual( hsp.query.description, "gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(55))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "complement(Query_2:186..350)") self.assertEqual( repr(hsp.target.seq), "Seq('TLSCISAARWVESMNASSTSSILSSGMQLTDDIFCRHCTETLVSLAFLEAHERTK')", ) self.assertEqual(hsp.target.id, "gi|255710474|ref|XM_002551475.1|") self.assertEqual(hsp.target.name, "XM_002551475") self.assertEqual( hsp.target.description, "Lachancea thermotolerans CBS 6340 KLTH0A01342p (KLTH0A01342g) mRNA, complete cds", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(4845))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "complement(gi|255710474|ref|XM_002551475.1|:183..347)", ) self.assertEqual( hsp.annotations["midline"], "T + IS AR V SM AS S G+ T D CRHC +V LAFL+ TK", ) self.assertEqual( str(hsp), """\ Query : Query_2 Length: 55 Strand: Plus gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds Target: gi|255710474|ref|XM_002551475.1| Length: 55 Strand: Plus Lachancea thermotolerans CBS 6340 KLTH0A01342p (KLTH0A01342g) mRNA, complete cds Score:45 bits(92), Expect:0.005, Identities:25/55(45%), Positives:29/55(53%), Gaps:0.55(0%) gi|255710 0 TLSCISAARWVESMNASSTSSILSSGMQLTDDIFCRHCTETLVSLAFLEAHERTK 55 0 |...||.||.|.||.||..|.....|...|.|..||||....|.||||.....|| 55 Query_2 0 TFN*ISIAR*VASMKASKISDSRLRGIDGTVDSPCRHCIARVVILAFLDWQANTK 55 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|254579534|ref|XM_002495708.1|") self.assertEqual(hit.target.name, "XM_002495708") self.assertEqual( hit.target.description, "Zygosaccharomyces rouxii hypothetical protein (ZYRO0C02266g) mRNA, complete cds", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=4866)") self.assertEqual(len(hit), 4) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 302.0) self.assertAlmostEqual(hsp.annotations["bit score"], 141.279) self.assertAlmostEqual(hsp.annotations["evalue"], 5.19486e-32, places=37) self.assertEqual(hsp.annotations["identity"], 57) self.assertEqual(hsp.annotations["positive"], 72) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 105.0) self.assertAlmostEqual(hsp.annotations["bit score"], 51.0121) self.assertAlmostEqual(hsp.annotations["evalue"], 8.66978e-12, places=17) self.assertEqual(hsp.annotations["identity"], 27) self.assertEqual(hsp.annotations["positive"], 33) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[2] self.assertEqual(hsp.num, 3) self.assertAlmostEqual(hsp.score, 85.0) self.assertAlmostEqual(hsp.annotations["bit score"], 41.8479) self.assertAlmostEqual(hsp.annotations["evalue"], 8.66978e-12, places=17) self.assertEqual(hsp.annotations["identity"], 20) self.assertEqual(hsp.annotations["positive"], 25) self.assertEqual(hsp.annotations["gaps"], 0) hsp = hit[3] self.assertEqual(hsp.num, 4) self.assertAlmostEqual(hsp.score, 92.0) self.assertAlmostEqual(hsp.annotations["bit score"], 45.0554) self.assertAlmostEqual(hsp.annotations["evalue"], 0.00480643) self.assertEqual(hsp.annotations["identity"], 31) self.assertEqual(hsp.annotations["positive"], 53) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 99], [ 0, 99]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 99)) self.assertEqual( repr(hsp.query.seq), "Seq('RIAFFIFRIEKKKFNHSPC***IH*DIEKST*F*GARKTSGFRTPFRVGLPIKE...SIK')", ) self.assertEqual(hsp.query.id, "Query_2") self.assertEqual( hsp.query.description, "gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(99))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual(feature.qualifiers["coded_by"], "Query_2:51..347") self.assertEqual( repr(hsp.target.seq), "Seq('RVAFTFIRVEEKEYCY*EC*R*IN*NFESSSQL*GIIKTSRFYSTASDVMCIQE...KTK')", ) self.assertEqual(hsp.target.id, "gi|254579534|ref|XM_002495708.1|") self.assertEqual(hsp.target.name, "XM_002495708") self.assertEqual( hsp.target.description, "Zygosaccharomyces rouxii hypothetical protein (ZYRO0C02266g) mRNA, complete cds", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual( repr(feature.location), "SimpleLocation(ExactPosition(0), ExactPosition(4866))", ) self.assertEqual(feature.type, "CDS") self.assertEqual(len(feature.qualifiers), 1) self.assertEqual( feature.qualifiers["coded_by"], "gi|254579534|ref|XM_002495708.1|:51..347" ) self.assertEqual( hsp.annotations["midline"], "R+AF R+E+K++ + C* *I+*+ E S+ *G KTS F + + I+EC D NAM + ++Y+ + + C++ G + +G K", ) self.assertEqual( str(hsp), """\ Query : Query_2 Length: 99 Strand: Plus gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds Target: gi|254579534|ref|XM_002495708.1| Length: 99 Strand: Plus Zygosaccharomyces rouxii hypothetical protein (ZYRO0C02266g) mRNA, complete cds Score:45 bits(92), Expect:0.005, Identities:31/99(31%), Positives:53/99(54%), Gaps:0.99(0%) gi|254579 0 RVAFTFIRVEEKEYCY*EC*R*IN*NFESSSQL*GIIKTSRFYSTASDVMCIQECQIDYY 0 |.||...|.|.|......||.||.|..|.|...||..|||.|.........|.||..|.. Query_2 0 RIAFFIFRIEKKKFNHSPC***IH*DIEKST*F*GARKTSGFRTPFRVGLPIKEC*NDDP gi|254579 60 INAMFPKIGHSAMYTGR*TLRRT*CVYRGQPAGNGYKTK 99 60 .|||.........|..........|...|.....|...| 99 Query_2 60 GNAMPTGTVNRSIYSSKPAV*NFGCLH*GYSSRDGDSIK 99 """, ) hit = record[0] self.assertEqual(hit.num, 1) hsps = hit[1:5:2] self.assertEqual( str(hsps), """\ Query: Query_2 gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds Hit: gi|296147483|ref|NM_001183135.1| (length=4911) Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds >gi|116616412|gb|EF059095.1| Synthetic construct Saccharomyces cerevisiae clone FLH203015.01X MON2, complete sequence HSPs: ---- -------- --------- ------ --------------- --------------------- # E-value Bit score Span Query range Hit range ---- -------- --------- ------ --------------- --------------------- 0 2.2e-73 278.74 116 [0:116] [0:116] 1 9e-71 270.04 116 [0:116] [0:116]""", ) def test_xml_2226_tblastx_004_writer(self): """Writing TBLASTX 2.2.26+ (xml_2226_tblastx_004.xml).""" filename = "xml_2226_tblastx_004.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 2) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_2226_tblastx_004(written_records) def test_xml_21500_tblastx_001_parser(self): """Parsing TBLASTX 2.15.0+ (xml_21500_tblastx_001.xml).""" filename = "xml_21500_tblastx_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_tblastx_001_records(records) with Blast.parse(path) as records: self.check_xml_21500_tblastx_001_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_tblastx_001_record(record, xml2=False) record = Blast.read(path) self.check_xml_21500_tblastx_001_record(record, xml2=False) with Blast.parse(path) as records: self.assertEqual( str(records), """\ Program: TBLASTX 2.15.0+ db: refseq_rna Query: Query_949527 (length=804) NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Hits: ---- ----- ---------------------------------------------------------- # # HSP ID + description ---- ----- ---------------------------------------------------------- 0 1 gi|1048027041|ref|XM_017618728.1| PREDICTED: Rhagoleti... 1 2 gi|2670929493|ref|XM_062937224.1| Kwoniella shivajii u... 2 1 gi|2432161024|ref|XM_053088494.1| Dioszegia hungarica ... 3 1 gi|799335188|ref|XM_012195534.1| Cryptococcus neoforma... 4 1 gi|1799711371|ref|XM_032001855.1| Kwoniella shandongen... 5 1 gi|1102541390|ref|XM_019193349.1| Kwoniella bestiolae ... 6 1 gi|1799711369|ref|XM_032001854.1| Kwoniella shandongen... 7 1 gi|2592096353|ref|XM_060229193.1| PREDICTED: Ylistrum ... 8 1 gi|2044197324|ref|XM_041762518.1| PREDICTED: Vulpes la... 9 1 gi|1101784196|ref|XM_019135081.1| Cryptococcus amylole...""", ) def test_xml2_21500_tblastx_001_parser(self): """Parsing TBLASTX 2.15.0+ (xml2_21500_tblastx_001.xml).""" filename = "xml2_21500_tblastx_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_tblastx_001_records(records, xml2=True) with Blast.parse(path) as records: self.check_xml_21500_tblastx_001_records(records, xml2=True) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_tblastx_001_record(record, xml2=True) record = Blast.read(path) self.check_xml_21500_tblastx_001_record(record, xml2=True) def check_xml_21500_tblastx_001_records(self, records, xml2=False): self.assertEqual(records.program, "tblastx") self.assertEqual(records.version, "TBLASTX 2.15.0+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "refseq_rna") if not xml2: self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "Query_949527") self.assertEqual( records.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=804)") self.assertEqual(len(records.param), 5) self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 0.05) self.assertEqual(records.param["filter"], "L;") if xml2: self.assertEqual(records.param["query-gencode"], 1) self.assertEqual(records.param["db-gencode"], 1) else: self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) record = next(records) self.assertRaises(StopIteration, next, records) self.check_xml_21500_tblastx_001_record(record, xml2) def check_xml_21500_tblastx_001_record(self, record, xml2): if not xml2: self.assertEqual(record.num, 1) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "Query_949527") self.assertEqual( record.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=804)") if xml2: self.assertEqual(len(record.query.features), 1) feature = record.query.features[0] self.assertEqual(feature.type, "masking") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(259), ExactPosition(293))" ) self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 60837734) self.assertEqual(record.stat["db-len"], 161330429008) if xml2: self.assertEqual(record.stat["hsp-len"], 66) self.assertEqual(record.stat["eff-space"], 10051826883450) self.assertEqual(record.stat["kappa"], -1) self.assertEqual(record.stat["lambda"], -1) self.assertEqual(record.stat["entropy"], -1) else: self.assertEqual(record.stat["hsp-len"], 0) self.assertEqual(record.stat["eff-space"], 0) self.assertAlmostEqual(record.stat["kappa"], 0.133956144488482) self.assertAlmostEqual(record.stat["lambda"], 0.317605957635731) self.assertAlmostEqual(record.stat["entropy"], 0.401214524497119) self.assertEqual(len(record), 10) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1048027041|ref|XM_017618728.1|") self.assertEqual(hit.target.name, "XM_017618728") self.assertEqual( hit.target.description, "PREDICTED: Rhagoletis zephyria response regulator PleD-like (LOC108364862), partial mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=917)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 150.0) self.assertAlmostEqual(hsp.annotations["bit score"], 71.6314) self.assertAlmostEqual(hsp.annotations["evalue"], 5.49617e-09, places=14) self.assertEqual(hsp.annotations["identity"], 29) self.assertEqual(hsp.annotations["positive"], 52) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 89], [ 0, 89]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 89)) self.assertEqual( repr(hsp.query.seq), "Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...QVS')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(89))" ) self.assertEqual( repr(hsp.target.seq), "Seq('ACGGQEALNLVVEQQPDIVLLDVMMPGIDGFMVCKEMKDNPMTTHIPVVMVTAL...SLT')", ) self.assertEqual(hsp.target.id, "gi|1048027041|ref|XM_017618728.1|") self.assertEqual(hsp.target.name, "XM_017618728") self.assertEqual( hsp.target.description, "PREDICTED: Rhagoletis zephyria response regulator PleD-like (LOC108364862), partial mRNA", ) if xml2: self.assertEqual(len(record.query.features), 1) feature = record.query.features[0] self.assertEqual(feature.type, "masking") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(259), ExactPosition(293))" ) self.assertEqual( hsp.annotations["midline"], "A GQE L+L E+ PD+++LD++MP +DG V + ++++ + V+M+TA + K ++ GA F+ KP D L I+ ++", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 89 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|1048027041|ref|XM_017618728.1| Length: 89 Strand: Plus PREDICTED: Rhagoletis zephyria response regulator PleD-like (LOC108364862), partial mRNA Score:71 bits(150), Expect:5e-09, Identities:29/89(33%), Positives:52/89(58%), Gaps:0.89(0%) gi|104802 0 ACGGQEALNLVVEQQPDIVLLDVMMPGIDGFMVCKEMKDNPMTTHIPVVMVTALHDTEDR 0 |..|||.|.|..|..||...||..||..||..|..............|.|.||....... Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT gi|104802 60 VKGINAGADDFLTKPIDETALSARIKSLT 89 60 .|....||..|..||.|...|...|.... 89 Query_949 60 KKAVDLGASYFILKPFDMENLVGHIRQVS 89 """, ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2670929493|ref|XM_062937224.1|") self.assertEqual(hit.target.name, "XM_062937224") self.assertEqual( hit.target.description, "Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=9882)") self.assertEqual(len(hit), 2) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 146.0) self.assertAlmostEqual(hsp.annotations["bit score"], 69.7986) self.assertAlmostEqual(hsp.annotations["evalue"], 1.95794e-08, places=13) self.assertEqual(hsp.annotations["identity"], 32) self.assertEqual(hsp.annotations["positive"], 51) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 94], [ 0, 94]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 94)) self.assertEqual( repr(hsp.query.seq), "Seq('IEGQEDMEVIGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDL...NLV')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(94))" ) self.assertEqual( repr(hsp.target.seq), "Seq('VDDSYDMRKVIEAHDGQEALELCSRALPDLIISDIMMPRLDGFGLLQALKSSSN...ELL')", ) self.assertEqual(hsp.target.id, "gi|2670929493|ref|XM_062937224.1|") self.assertEqual(hsp.target.name, "XM_062937224") self.assertEqual( hsp.target.description, "Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(9882))" ) self.assertEqual( hsp.annotations["midline"], "++ DM + A++GQE L L PD+++ DI+MP LDG +L+ L+ S +I+LTA G +D + GA ++ KPF L+", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 94 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|2670929493|ref|XM_062937224.1| Length: 94 Strand: Plus Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA Score:69 bits(146), Expect:2e-08, Identities:32/94(34%), Positives:51/94(54%), Gaps:0.94(0%) gi|267092 0 VDDSYDMRKVIEAHDGQEALELCSRALPDLIISDIMMPRLDGFGLLQALKSSSNLISVPI 0 .....||.....|..|||.|.|.....||....||.||.|||...|..|..|........ Query_949 0 IEGQEDMEVIGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNV gi|267092 60 ILLTARGDQDFKVGGLMSGAEDYLSKPFSTPELL 94 60 |.|||.|..|........||.....|||....|. 94 Query_949 60 IMLTAFGQEDVTKKAVDLGASYFILKPFDMENLV 94 """, ) hsp = hit[1] self.assertEqual(hsp.num, 2) self.assertAlmostEqual(hsp.score, 118.0) self.assertAlmostEqual(hsp.annotations["bit score"], 56.9688) self.assertAlmostEqual(hsp.annotations["evalue"], 0.000142549) self.assertEqual(hsp.annotations["identity"], 25) self.assertEqual(hsp.annotations["positive"], 42) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 68], [ 0, 68]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 68)) self.assertEqual( repr(hsp.query.seq), "Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...LGA')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(68))" ) self.assertEqual( repr(hsp.target.seq), "Seq('ARDGQEALALCAQQAPDLIISDVMMPNLDGFGLLRALKQSKKLAIIPIIMLTAR...AGA')", ) self.assertEqual(hsp.target.id, "gi|2670929493|ref|XM_062937224.1|") self.assertEqual(hsp.target.name, "XM_062937224") self.assertEqual( hsp.target.description, "Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(9882))" ) self.assertEqual( hsp.annotations["midline"], "A +GQE L+L ++ PD+++ D++MP+LDG +L L++S +IMLTA G ++ + GA", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 68 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|2670929493|ref|XM_062937224.1| Length: 68 Strand: Plus Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA Score:56 bits(118), Expect:0.0001, Identities:25/68(37%), Positives:42/68(62%), Gaps:0.68(0%) gi|267092 0 ARDGQEALALCAQQAPDLIISDVMMPNLDGFGLLRALKQSKKLAIIPIIMLTARGGDEAR 0 |..|||.|.|.....||....|..||.|||...|..|..|........|||||.|..... Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT gi|267092 60 VDGILAGA 68 60 ......|| 68 Query_949 60 KKAVDLGA 68 """, ) hit = record[2] self.assertEqual(hit.num, 3) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2432161024|ref|XM_053088494.1|") self.assertEqual(hit.target.name, "XM_053088494") self.assertEqual( hit.target.description, "Dioszegia hungarica CnHHK4 (MKK02DRAFT_32386), partial mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=5601)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 146.0) self.assertAlmostEqual(hsp.annotations["bit score"], 69.7986) self.assertAlmostEqual(hsp.annotations["evalue"], 1.95794e-08, places=13) self.assertEqual(hsp.annotations["identity"], 32) self.assertEqual(hsp.annotations["positive"], 47) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 82], [ 0, 82]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 82)) self.assertEqual( repr(hsp.query.seq), "Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))" ) self.assertEqual( repr(hsp.target.seq), "Seq('ARDGQEALEMCKKSLPHVVITDVMMPNLDGFGLLAALKEDPKLSMVPVIMLTAR...ELI')", ) self.assertEqual(hsp.target.id, "gi|2432161024|ref|XM_053088494.1|") self.assertEqual(hsp.target.name, "XM_053088494") self.assertEqual( hsp.target.description, "Dioszegia hungarica CnHHK4 (MKK02DRAFT_32386), partial mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5601))" ) self.assertEqual( hsp.annotations["midline"], "A +GQE L + K+ P V++ D++MP+LDG +L L+E VIMLTA G E+ + GA +I KPF+ L+", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 82 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|2432161024|ref|XM_053088494.1| Length: 82 Strand: Plus Dioszegia hungarica CnHHK4 (MKK02DRAFT_32386), partial mRNA Score:69 bits(146), Expect:2e-08, Identities:32/82(39%), Positives:47/82(57%), Gaps:0.82(0%) gi|243216 0 ARDGQEALEMCKKSLPHVVITDVMMPNLDGFGLLAALKEDPKLSMVPVIMLTARGGEEAK 0 |..|||.|...|...|.|...|..||.|||...|..|.|........||||||.|.|... Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT gi|243216 60 VDGLLAGADDYIAKPFNARELI 82 60 ......||...|.|||....|. 82 Query_949 60 KKAVDLGASYFILKPFDMENLV 82 """, ) hit = record[3] self.assertEqual(hit.num, 4) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|799335188|ref|XM_012195534.1|") self.assertEqual(hit.target.name, "XM_012195534") self.assertEqual( hit.target.description, "Cryptococcus neoformans var. grubii H99 hypothetical protein (CNAG_03355), mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=5764)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 146.0) self.assertAlmostEqual(hsp.annotations["bit score"], 69.7986) self.assertAlmostEqual(hsp.annotations["evalue"], 1.95794e-08, places=13) self.assertEqual(hsp.annotations["identity"], 31) self.assertEqual(hsp.annotations["positive"], 48) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 82], [ 0, 82]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 82)) self.assertEqual( repr(hsp.query.seq), "Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))" ) self.assertEqual( repr(hsp.target.seq), "Seq('ARDGQEALELCQKSVPDLIISDVMMPHLNGFELLVALKRSKDLKMVPVIMLTAR...EIV')", ) self.assertEqual(hsp.target.id, "gi|799335188|ref|XM_012195534.1|") self.assertEqual(hsp.target.name, "XM_012195534") self.assertEqual( hsp.target.description, "Cryptococcus neoformans var. grubii H99 hypothetical protein (CNAG_03355), mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5764))" ) self.assertEqual( hsp.annotations["midline"], "A +GQE L L ++ PD+++ D++MPHL+G +L L+ S K VIMLTA G ++ + GA ++ KPF +V", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 82 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|799335188|ref|XM_012195534.1| Length: 82 Strand: Plus Cryptococcus neoformans var. grubii H99 hypothetical protein (CNAG_03355), mRNA Score:69 bits(146), Expect:2e-08, Identities:31/82(38%), Positives:48/82(59%), Gaps:0.82(0%) gi|799335 0 ARDGQEALELCQKSVPDLIISDVMMPHLNGFELLVALKRSKDLKMVPVIMLTARGADESK 0 |..|||.|.|.....||....|..||||.|...|..|..|...|...||||||.|..... Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT gi|799335 60 VDGIMAGAEDYLAKPFSAREIV 82 60 ......||.....|||.....| 82 Query_949 60 KKAVDLGASYFILKPFDMENLV 82 """, ) hit = record[4] self.assertEqual(hit.num, 5) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1799711371|ref|XM_032001855.1|") self.assertEqual(hit.target.name, "XM_032001855") self.assertEqual( hit.target.description, "Kwoniella shandongensis uncharacterized protein (CI109_000662), partial mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=5538)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 145.0) self.assertAlmostEqual(hsp.annotations["bit score"], 69.3404) self.assertAlmostEqual(hsp.annotations["evalue"], 2.68988e-08, places=13) self.assertEqual(hsp.annotations["identity"], 31) self.assertEqual(hsp.annotations["positive"], 51) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 82], [ 0, 82]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 82)) self.assertEqual( repr(hsp.query.seq), "Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))" ) self.assertEqual( repr(hsp.target.seq), "Seq('ARDGVEALQLCKKQLPNLIITDVMMPNLDGFGLLAALKESRAMKVIPVIMLTAR...EIV')", ) self.assertEqual(hsp.target.id, "gi|1799711371|ref|XM_032001855.1|") self.assertEqual(hsp.target.name, "XM_032001855") self.assertEqual( hsp.target.description, "Kwoniella shandongensis uncharacterized protein (CI109_000662), partial mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5538))" ) self.assertEqual( hsp.annotations["midline"], "A +G E L L K++ P++++ D++MP+LDG +L L+ES K VIMLTA G ++ + + GA ++ KPF+ +V", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 82 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|1799711371|ref|XM_032001855.1| Length: 82 Strand: Plus Kwoniella shandongensis uncharacterized protein (CI109_000662), partial mRNA Score:69 bits(145), Expect:3e-08, Identities:31/82(38%), Positives:51/82(62%), Gaps:0.82(0%) gi|179971 0 ARDGVEALQLCKKQLPNLIITDVMMPNLDGFGLLAALKESRAMKVIPVIMLTARGGDESK 0 |..|.|.|.|.|...|.....|..||.|||...|..|.||...|...||||||.|..... Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT gi|179971 60 VEGILAGADDYLAKPFNAREIV 82 60 ......||.....|||.....| 82 Query_949 60 KKAVDLGASYFILKPFDMENLV 82 """, ) hit = record[5] self.assertEqual(hit.num, 6) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1102541390|ref|XM_019193349.1|") self.assertEqual(hit.target.name, "XM_019193349") self.assertEqual( hit.target.description, "Kwoniella bestiolae CBS 10118 hypothetical protein partial mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=5319)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 144.0) self.assertAlmostEqual(hsp.annotations["bit score"], 68.8822) self.assertAlmostEqual(hsp.annotations["evalue"], 3.69545e-08, places=13) self.assertEqual(hsp.annotations["identity"], 32) self.assertEqual(hsp.annotations["positive"], 49) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 82], [ 0, 82]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 82)) self.assertEqual( repr(hsp.query.seq), "Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))" ) self.assertEqual( repr(hsp.target.seq), "Seq('ARDGQEALEMCGKKMPDLIISDVMMPNLDGFGLLEALKASKELSIIPVIMLTAR...ELV')", ) self.assertEqual(hsp.target.id, "gi|1102541390|ref|XM_019193349.1|") self.assertEqual(hsp.target.name, "XM_019193349") self.assertEqual( hsp.target.description, "Kwoniella bestiolae CBS 10118 hypothetical protein partial mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5319))" ) self.assertEqual( hsp.annotations["midline"], "A +GQE L + +K PD+++ D++MP+LDG +LE L+ S VIMLTA G ++ + GA ++ KPF+ LV", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 82 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|1102541390|ref|XM_019193349.1| Length: 82 Strand: Plus Kwoniella bestiolae CBS 10118 hypothetical protein partial mRNA Score:68 bits(144), Expect:4e-08, Identities:32/82(39%), Positives:49/82(60%), Gaps:0.82(0%) gi|110254 0 ARDGQEALEMCGKKMPDLIISDVMMPNLDGFGLLEALKASKELSIIPVIMLTARGGDEAK 0 |..|||.|.....|.||....|..||.|||...||.|..|.......||||||.|..... Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT gi|110254 60 VDGLLAGADDYLAKPFNSRELV 82 60 ......||.....|||....|| 82 Query_949 60 KKAVDLGASYFILKPFDMENLV 82 """, ) hit = record[6] self.assertEqual(hit.num, 7) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1799711369|ref|XM_032001854.1|") self.assertEqual(hit.target.name, "XM_032001854") self.assertEqual( hit.target.description, "Kwoniella shandongensis uncharacterized protein (CI109_000661), partial mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=3591)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 143.0) self.assertAlmostEqual(hsp.annotations["bit score"], 68.424) self.assertAlmostEqual(hsp.annotations["evalue"], 5.07694e-08, places=13) self.assertEqual(hsp.annotations["identity"], 31) self.assertEqual(hsp.annotations["positive"], 53) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 86], [ 0, 86]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 86)) self.assertEqual( repr(hsp.query.seq), "Seq('VIGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIM...NLV')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(86))" ) self.assertEqual( repr(hsp.target.seq), "Seq('VVVEARDGQEALDKCMQIRPDLIITDVMMPVLDGFGLLRALKQSDELKAIPVIM...ELI')", ) self.assertEqual(hsp.target.id, "gi|1799711369|ref|XM_032001854.1|") self.assertEqual(hsp.target.name, "XM_032001854") self.assertEqual( hsp.target.description, "Kwoniella shandongensis uncharacterized protein (CI109_000661), partial mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(3591))" ) self.assertEqual( hsp.annotations["midline"], "V+ A +GQE L + PD+++ D++MP LDG +L L++SD K VIM+TA ++ +A+ GA +++KPF++ L+", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 86 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|1799711369|ref|XM_032001854.1| Length: 86 Strand: Plus Kwoniella shandongensis uncharacterized protein (CI109_000661), partial mRNA Score:68 bits(143), Expect:5e-08, Identities:31/86(36%), Positives:53/86(62%), Gaps:0.86(0%) gi|179971 0 VVVEARDGQEALDKCMQIRPDLIITDVMMPVLDGFGLLRALKQSDELKAIPVIMVTAHDG 0 |...|..|||.|.......||....|..||.|||...|..|..||..|...|||.||... Query_949 0 VIGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQ gi|179971 60 DEAKVEALLGGADDYMVKPFNVRELI 86 60 ......|...||.....|||....|. 86 Query_949 60 EDVTKKAVDLGASYFILKPFDMENLV 86 """, ) hit = record[7] self.assertEqual(hit.num, 8) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2592096353|ref|XM_060229193.1|") self.assertEqual(hit.target.name, "XM_060229193") self.assertEqual( hit.target.description, "PREDICTED: Ylistrum balloti signal transduction histidine-protein kinase BarA-like (LOC132564539), mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=1374)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 142.0) self.assertAlmostEqual(hsp.annotations["bit score"], 67.9658) self.assertAlmostEqual(hsp.annotations["evalue"], 6.97488e-08, places=13) self.assertEqual(hsp.annotations["identity"], 31) self.assertEqual(hsp.annotations["positive"], 51) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 84], [ 0, 84]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 84)) self.assertEqual( repr(hsp.query.seq), "Seq('IGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIML...ENL')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(84))" ) self.assertEqual( repr(hsp.target.seq), "Seq('IHLASNGQEAIQKTKVLDPDLIFLDLHMPDMSGLEVIEILRSITAYRDTPIIIL...DKL')", ) self.assertEqual(hsp.target.id, "gi|2592096353|ref|XM_060229193.1|") self.assertEqual(hsp.target.name, "XM_060229193") self.assertEqual( hsp.target.description, "PREDICTED: Ylistrum balloti signal transduction histidine-protein kinase BarA-like (LOC132564539), mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(1374))" ) self.assertEqual( hsp.annotations["midline"], "I +A NGQE + K DPD++ LD+ MP + GL V+E LR + +I+L+A + +KA+ +GAS ++ KP +++ L", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 84 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|2592096353|ref|XM_060229193.1| Length: 84 Strand: Plus PREDICTED: Ylistrum balloti signal transduction histidine-protein kinase BarA-like (LOC132564539), mRNA Score:67 bits(142), Expect:7e-08, Identities:31/84(37%), Positives:51/84(61%), Gaps:0.84(0%) gi|259209 0 IHLASNGQEAIQKTKVLDPDLIFLDLHMPDMSGLEVIEILRSITAYRDTPIIILSADAII 0 |..|.||||.....|..|||...||..||...||.|.|.||..........|.|.|.... Query_949 0 IGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQE gi|259209 60 EQQEKALAVGASAYLTKPIEIDKL 84 60 ....||...|||....||.....| 84 Query_949 60 DVTKKAVDLGASYFILKPFDMENL 84 """, ) hit = record[8] self.assertEqual(hit.num, 9) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|2044197324|ref|XM_041762518.1|") self.assertEqual(hit.target.name, "XM_041762518") self.assertEqual( hit.target.description, "PREDICTED: Vulpes lagopus sensory transduction protein RegX3-like (LOC121495284), mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=681)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 142.0) self.assertAlmostEqual(hsp.annotations["bit score"], 67.9658) self.assertAlmostEqual(hsp.annotations["evalue"], 6.97488e-08, places=13) self.assertEqual(hsp.annotations["identity"], 30) self.assertEqual(hsp.annotations["positive"], 45) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 73], [ 0, 73]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 73)) self.assertEqual( repr(hsp.query.seq), "Seq('PDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVTKKAVDLGAS...RQV')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(73))" ) self.assertEqual( repr(hsp.target.seq), "Seq('PDLILLDIMLPGSDGLSSLEELRAKKSSESIPVIMATAKGTEFDKVKGLDMGAD...KAV')", ) self.assertEqual(hsp.target.id, "gi|2044197324|ref|XM_041762518.1|") self.assertEqual(hsp.target.name, "XM_041762518") self.assertEqual( hsp.target.description, "PREDICTED: Vulpes lagopus sensory transduction protein RegX3-like (LOC121495284), mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(681))" ) self.assertEqual( hsp.annotations["midline"], "PD+++LDI++P DGL+ LE LR + VIM TA G E K +D+GA +++KPF M ++ I+ V", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 73 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|2044197324|ref|XM_041762518.1| Length: 73 Strand: Plus PREDICTED: Vulpes lagopus sensory transduction protein RegX3-like (LOC121495284), mRNA Score:67 bits(142), Expect:7e-08, Identities:30/73(41%), Positives:45/73(62%), Gaps:0.73(0%) gi|204419 0 PDLILLDIMLPGSDGLSSLEELRAKKSSESIPVIMATAKGTEFDKVKGLDMGADDYLVKP 0 ||...|||..|..|||..||.||.........|||.||.|.|....|..|.||.....|| Query_949 0 PDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVTKKAVDLGASYFILKP gi|204419 60 FGMMEMISRIKAV 73 60 |.|......|..| 73 Query_949 60 FDMENLVGHIRQV 73 """, ) hit = record[9] self.assertEqual(hit.num, 10) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gi|1101784196|ref|XM_019135081.1|") self.assertEqual(hit.target.name, "XM_019135081") self.assertEqual( hit.target.description, "Cryptococcus amylolentus CBS 6039 hypothetical protein (L202_01642), partial mRNA", ) self.assertEqual(repr(hit.target.seq), "Seq(None, length=5487)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 142.0) self.assertAlmostEqual(hsp.annotations["bit score"], 67.9658) self.assertAlmostEqual(hsp.annotations["evalue"], 6.97488e-08, places=13) self.assertEqual(hsp.annotations["identity"], 28) self.assertEqual(hsp.annotations["positive"], 50) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 82], [ 0, 82]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 82)) self.assertEqual( repr(hsp.query.seq), "Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')", ) self.assertEqual(hsp.query.id, "Query_949527") self.assertEqual( hsp.query.description, "NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome", ) self.assertEqual(len(hsp.query.features), 1) feature = hsp.query.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))" ) self.assertEqual( repr(hsp.target.seq), "Seq('ARDGREALELCAKQKPNLIISDVMMPHVDGFELLTTLKDSSEFRMIPVIMLTAR...EIV')", ) self.assertEqual(hsp.target.id, "gi|1101784196|ref|XM_019135081.1|") self.assertEqual(hsp.target.name, "XM_019135081") self.assertEqual( hsp.target.description, "Cryptococcus amylolentus CBS 6039 hypothetical protein (L202_01642), partial mRNA", ) self.assertEqual(len(hsp.target.features), 1) feature = hsp.target.features[0] self.assertEqual(feature.type, "CDS") location = feature.location self.assertEqual( repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5487))" ) self.assertEqual( hsp.annotations["midline"], "A +G+E L L ++ P++++ D++MPH+DG +L L++S + VIMLTA G ++ + GA ++ KPF+ +V", ) self.assertEqual( repr(hsp), "", ) self.assertEqual( str(hsp), """\ Query : Query_949527 Length: 82 Strand: Plus NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome Target: gi|1101784196|ref|XM_019135081.1| Length: 82 Strand: Plus Cryptococcus amylolentus CBS 6039 hypothetical protein (L202_01642), partial mRNA Score:67 bits(142), Expect:7e-08, Identities:28/82(34%), Positives:50/82(61%), Gaps:0.82(0%) gi|110178 0 ARDGREALELCAKQKPNLIISDVMMPHVDGFELLTTLKDSSEFRMIPVIMLTARGADESK 0 |..|.|.|.|.....|.....|..|||.||...|..|..|.......||||||.|..... Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT gi|110178 60 VSGIMAGAEDYLAKPFNAREIV 82 60 ......||.....|||.....| 82 Query_949 60 KKAVDLGASYFILKPFDMENLV 82 """, ) def test_xml_21500_tblastx_001_writer(self): """Writing TBLASTX 2.15.0+ (xml_21500_tblastx_001.xml).""" filename = "xml_21500_tblastx_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_tblastx_001_records(written_records) def test_xml2_21500_tblastx_001_writer(self): """Writing TBLASTX 2.15.0+ XML2 (xml2_21500_tblastx_001.xml).""" filename = "xml2_21500_tblastx_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream, fmt="XML2") self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_tblastx_001_records(written_records, xml2=True) class TestRPSBlast(unittest.TestCase): """Test the Blast XML parser for rpsblast output.""" def test_xml_21500_rpsblast_001_parser(self): """Parsing RPSBLAST 2.15.0+ (xml_21500_rpsblast_001.xml).""" filename = "xml_21500_rpsblast_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_rpsblast_001_records(records) with Blast.parse(path) as records: self.check_xml_21500_rpsblast_001_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_rpsblast_001_record(record) record = Blast.read(path) self.check_xml_21500_rpsblast_001_record(record) def test_xml2_21500_rpsblast_001_parser(self): """Parsing RPSBLAST 2.15.0+ (xml2_21500_rpsblast_001.xml).""" filename = "xml2_21500_rpsblast_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_rpsblast_001_records(records, xml2=True) with Blast.parse(path) as records: self.check_xml_21500_rpsblast_001_records(records, xml2=True) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_rpsblast_001_record(record, xml2=True) record = Blast.read(path) self.check_xml_21500_rpsblast_001_record(record, xml2=True) def test_xml_21500_rpsblast_001_writer(self): """Writing rpsblast 2.15.0+ (xml_21500_rpsblast_001.xml).""" filename = "xml_21500_rpsblast_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_rpsblast_001_records(written_records) def test_xml2_21500_rpsblast_001_writer(self): """Writing rpsblast 2.9.0+ XML2 (xml2_21500_rpsblast_001_v2.xml).""" filename = "xml2_21500_rpsblast_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream, fmt="XML2") self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_rpsblast_001_records(written_records, xml2=True) def check_xml_21500_rpsblast_001_records(self, records, xml2=False): self.assertEqual(records.program, "rpsblast") self.assertEqual(records.version, "RPSBLAST 2.15.0+") self.assertEqual( records.reference, 'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.', ) self.assertEqual(records.db, "Cdd") if not xml2: self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "Query_1") self.assertEqual( records.query.description, "pdb|3FAJ|A Chain A, Structure of the structural protein P131 of the archaeal virus Acidianus Two-tailed virus (ATV)", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=151)") self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 1) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "F") if xml2: self.assertEqual(records.param["cbs"], 1) self.assertEqual(len(records.param), 6) else: self.assertEqual(len(records.param), 5) record = next(records) self.assertRaises(StopIteration, next, records) self.check_xml_21500_rpsblast_001_record(record, xml2) def check_xml_21500_rpsblast_001_record(self, record, xml2=False): if not xml2: self.assertEqual(record.num, 1) self.assertEqual( repr(record), "", ) self.assertIsInstance(record.query, SeqRecord) self.assertEqual(record.query.id, "Query_1") self.assertEqual( record.query.description, "pdb|3FAJ|A Chain A, Structure of the structural protein P131 of the archaeal virus Acidianus Two-tailed virus (ATV)", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=151)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 59693) self.assertEqual(record.stat["db-len"], 13521240) self.assertEqual(record.stat["hsp-len"], 89) self.assertEqual(record.stat["eff-space"], 508930906) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 2) hit = record[0] self.assertEqual(hit.num, 1) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gnl|CDD|165101") self.assertEqual( hit.target.description, "PHA02734, PHA02734, coat protein; Provisional.", ) self.assertEqual(hit.target.name, "165101") self.assertEqual(repr(hit.target.seq), "Seq(None, length=149)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 520.0) self.assertAlmostEqual(hsp.annotations["bit score"], 204.685) self.assertAlmostEqual(hsp.annotations["evalue"], 9.29691e-69, places=74) self.assertEqual(hsp.annotations["identity"], 94) self.assertEqual(hsp.annotations["positive"], 103) self.assertEqual(hsp.annotations["gaps"], 18) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 77, 93, 110, 112, 149], [ 20, 97, 97, 114, 114, 151]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 149)) self.assertEqual( repr(hsp.query.seq), "Seq({20: 'MAKYEPKKGDYAGGAVKILDMFENGQLGYPEVTLKLAGEEANARRAGDERTKEA...MAS'}, length=151)", ) self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "pdb|3FAJ|A Chain A, Structure of the structural protein P131 of the archaeal virus Acidianus Two-tailed virus (ATV)", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq('MAKEEPKKGDYAGGAAKILDGFEAGQLGFPEVSLKLAGEEANARKAGDANAKAA...AAM')", ) self.assertEqual(hsp.target.id, "gnl|CDD|165101") self.assertEqual( hsp.target.description, "PHA02734, PHA02734, coat protein; Provisional.", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual( hsp.annotations["midline"], "MAK EPKKGDYAGGA KILD FE GQLG+PEV+LKLAGEEANAR+AGD K AIHAI+KMI DAMKP RNKG GF+ SQ IPGE+ AQV + E YQQAKAFLA+PA R + E LSKGAK LA A A ", ) hit = record[1] self.assertEqual(hit.num, 2) self.assertIsInstance(hit.target, SeqRecord) self.assertEqual(hit.target.id, "gnl|CDD|410801") self.assertEqual( hit.target.description, "cd20027, FH_FOXL1, Forkhead (FH) domain found in Forkhead box protein L1 (FOXL1) and similar proteins. FOXL1, also called Forkhead-related protein FKHL11 or Forkhead-related transcription factor 7 (FREAC-7), acts as a transcription factor required for proper proliferation and differentiation in the gastrointestinal epithelium. It may play a critical role in suppressing tumorigenesis. The FH domain is a winged helix DNA-binding domain. FOX transcription factors recognize the core sequence 5'-(A/C)AA(C/T)A-3'.", ) self.assertEqual(hit.target.name, "410801") self.assertEqual(repr(hit.target.seq), "Seq(None, length=98)") self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 65.0) self.assertAlmostEqual(hsp.annotations["bit score"], 29.0095) self.assertAlmostEqual(hsp.annotations["evalue"], 0.517343) self.assertEqual(hsp.annotations["identity"], 13) self.assertEqual(hsp.annotations["positive"], 15) self.assertEqual(hsp.annotations["gaps"], 4) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[63, 66, 66, 90], [14, 17, 21, 45]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 31)) self.assertEqual( repr(hsp.query.seq), "Seq({14: 'VPRGSHMAKYEPKKGDYAGGAVKILDMFENG'}, length=151)", ) self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "pdb|3FAJ|A Chain A, Structure of the structural protein P131 of the archaeal virus Acidianus Two-tailed virus (ATV)", ) self.assertEqual(len(hsp.query.features), 0) self.assertEqual( repr(hsp.target.seq), "Seq({63: 'VPREKGRPGKGNYWTLDPDCEEMFENG'}, length=98)", ) self.assertEqual(hsp.target.id, "gnl|CDD|410801") self.assertEqual( hsp.target.description, "cd20027, FH_FOXL1, Forkhead (FH) domain found in Forkhead box protein L1 (FOXL1) and similar proteins. FOXL1, also called Forkhead-related protein FKHL11 or Forkhead-related transcription factor 7 (FREAC-7), acts as a transcription factor required for proper proliferation and differentiation in the gastrointestinal epithelium. It may play a critical role in suppressing tumorigenesis. The FH domain is a winged helix DNA-binding domain. FOX transcription factors recognize the core sequence 5'-(A/C)AA(C/T)A-3'.", ) self.assertEqual(len(hsp.target.features), 0) self.assertEqual(hsp.annotations["midline"], "VPR K P KG+Y +MFENG") class TestPSIBlast(unittest.TestCase): """Test the Blast XML parser for psiblast output.""" def test_xml_21500_psiblast_001_parser(self): """Parsing PSIBLAST 2.15.0+ (xml_21500_psiblast_001.xml).""" filename = "xml_21500_psiblast_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_psiblast_001_records(records) with Blast.parse(path) as records: self.check_xml_21500_psiblast_001_records(records) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_psiblast_001_record(record) record = Blast.read(path) self.check_xml_21500_psiblast_001_record(record) def test_xml2_21500_psiblast_001_parser(self): """Parsing PSIBLAST 2.15.0+ (xml2_21500_psiblast_001.xml).""" filename = "xml2_21500_psiblast_001.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) self.check_xml_21500_psiblast_001_records(records, xml2=True) with Blast.parse(path) as records: self.check_xml_21500_psiblast_001_records(records, xml2=True) with open(path, "rb") as stream: record = Blast.read(stream) self.check_xml_21500_psiblast_001_record(record, xml2=True) record = Blast.read(path) self.check_xml_21500_psiblast_001_record(record, xml2=True) def test_xml_21500_psiblast_001_writer(self): """Writing psiblast 2.15.0+ (xml_21500_psiblast_001.xml).""" filename = "xml_21500_psiblast_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream) self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_psiblast_001_records(written_records) def test_xml2_21500_psiblast_001_writer(self): """Writing psiblast 2.9.0+ XML2 (xml2_21500_psiblast_001_v2.xml).""" filename = "xml2_21500_psiblast_001.xml" path = os.path.join("Blast", filename) with Blast.parse(path) as records: stream = io.BytesIO() n = Blast.write(records, stream, fmt="XML2") self.assertEqual(n, 1) stream.seek(0) written_records = Blast.parse(stream) self.check_xml_21500_psiblast_001_records(written_records, xml2=True) def check_xml_21500_psiblast_001_records(self, records, xml2=False): self.assertEqual(records.program, "psiblast") self.assertEqual(records.version, "PSIBLAST 2.15.0+") self.assertEqual( records.reference, 'Alejandro A. Schäffer, L. Aravind, Thomas L. Madden, Sergei Shavirin, John L. Spouge, Yuri I. Wolf, Eugene V. Koonin, and Stephen F. Altschul (2001), "Improving the accuracy of PSI-BLAST protein database searches with composition-based statistics and other refinements", Nucleic Acids Res. 29:2994-3005.', ) self.assertEqual(records.db, "swissprot") if not xml2: self.assertIsInstance(records.query, SeqRecord) self.assertEqual(records.query.id, "Query_1") self.assertEqual( records.query.description, "WP_001234791.1 Sec-independent protein translocase subunit TatA [Shigella flexneri]", ) self.assertEqual(repr(records.query.seq), "Seq(None, length=103)") self.assertEqual(records.param["matrix"], "BLOSUM62") self.assertAlmostEqual(records.param["expect"], 1e-30, places=36) self.assertEqual(records.param["gap-open"], 11) self.assertEqual(records.param["gap-extend"], 1) self.assertEqual(records.param["filter"], "F") if xml2: self.assertEqual(records.param["cbs"], 2) self.assertEqual(len(records.param), 6) else: self.assertEqual(len(records.param), 5) record = next(records) self.assertRaises(StopIteration, next, records) self.check_xml_21500_psiblast_001_record(record, xml2) def check_xml_21500_psiblast_001_record(self, record, xml2=False): self.assertEqual(record.num, 1) self.assertEqual( repr(record), "" % record.query.id, ) self.assertIsInstance(record.query, SeqRecord) if xml2: self.assertEqual(record.query.id, "lcl|Query_1") else: self.assertEqual(record.query.id, "Query_1") self.assertEqual( record.query.description, "WP_001234791.1 Sec-independent protein translocase subunit TatA [Shigella flexneri]", ) self.assertEqual(repr(record.query.seq), "Seq(None, length=103)") self.assertEqual(len(record.stat), 7) self.assertEqual(record.stat["db-num"], 482816) self.assertEqual(record.stat["db-len"], 183558113) self.assertEqual(record.stat["hsp-len"], 72) self.assertEqual(record.stat["eff-space"], 4627826878) self.assertAlmostEqual(record.stat["kappa"], 0.041) self.assertAlmostEqual(record.stat["lambda"], 0.267) self.assertAlmostEqual(record.stat["entropy"], 0.14) self.assertEqual(len(record), 2) hit = record[0] self.assertEqual(hit.num, 1) target = hit.target self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "sp|P69428.1|") self.assertEqual(target.name, "P69428") seq = target.seq self.assertEqual(repr(seq), "Seq(None, length=89)") if xml2: self.assertEqual( target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli K-12]", ) self.assertEqual(target.annotations["taxid"], 83333) self.assertIs(target, hit.targets[0]) self.assertEqual(len(hit.targets), 4) target = hit.targets[1] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "sp|P69429.1|") self.assertEqual(target.name, "P69429") self.assertIs(target.seq, seq) self.assertEqual( target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli CFT073]", ) self.assertEqual(target.annotations["taxid"], 199310) target = hit.targets[2] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "sp|P69430.1|") self.assertEqual(target.name, "P69430") self.assertIs(target.seq, seq) self.assertEqual( target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli O157:H7]", ) self.assertEqual(target.annotations["taxid"], 83334) target = hit.targets[3] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "sp|P69431.1|") self.assertEqual(target.name, "P69431") self.assertIs(target.seq, seq) self.assertEqual( target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Shigella flexneri]", ) self.assertEqual(target.annotations["taxid"], 623) else: self.assertEqual( target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli K-12] >sp|P69429.1| RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli CFT073] >sp|P69430.1| RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli O157:H7] >sp|P69431.1| RecName: Full=Sec-independent protein translocase protein TatA [Shigella flexneri]", ) self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 448.0) self.assertAlmostEqual(hsp.annotations["bit score"], 177.178) self.assertAlmostEqual(hsp.annotations["evalue"], 2.3039e-58, places=64) self.assertEqual(hsp.annotations["identity"], 89) if xml2: self.assertEqual(hsp.shape, (2, 0)) else: self.assertEqual(hsp.annotations["positive"], 89) self.assertEqual(hsp.annotations["gaps"], 0) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 89], [14, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 89)) if xml2: self.assertEqual(repr(hsp.query.seq), "Seq(None, length=103)") self.assertEqual(hsp.query.id, "lcl|Query_1") else: self.assertEqual( repr(hsp.query.seq), "Seq({14: 'MGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTS...EQV'}, length=103)", ) self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "WP_001234791.1 Sec-independent protein translocase subunit TatA [Shigella flexneri]", ) self.assertEqual(len(hsp.query.features), 0) if xml2: self.assertEqual( repr(hsp.target.seq), "Seq(None, length=89)", ) else: self.assertEqual( repr(hsp.target.seq), "Seq('MGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTS...EQV')", ) self.assertEqual(hsp.target.id, "sp|P69428.1|") if xml2: self.assertEqual( hsp.target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli K-12]", ) else: self.assertEqual( hsp.target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli K-12] >sp|P69429.1| RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli CFT073] >sp|P69430.1| RecName: Full=Sec-independent protein translocase protein TatA [Escherichia coli O157:H7] >sp|P69431.1| RecName: Full=Sec-independent protein translocase protein TatA [Shigella flexneri]", ) self.assertEqual(len(hsp.target.features), 0) if not xml2: self.assertEqual( hsp.annotations["midline"], "MGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTSQDADFTAKTIADKQADTNQEQAKTEDAKRHDKEQV", ) hit = record[1] self.assertEqual(hit.num, 2) target = hit.target self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "sp|P0A2H3.1|") self.assertEqual(target.name, "P0A2H3") seq = target.seq self.assertEqual(repr(seq), "Seq(None, length=84)") if xml2: self.assertEqual( target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhimurium str. LT2]", ) self.assertEqual(target.annotations["taxid"], 99287) self.assertIs(target, hit.targets[0]) self.assertEqual(len(hit.targets), 2) target = hit.targets[1] self.assertIsInstance(target, SeqRecord) self.assertEqual(target.id, "sp|P0A2H4.1|") self.assertEqual(target.name, "P0A2H4") self.assertIs(target.seq, seq) self.assertEqual( target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhi]", ) self.assertEqual(target.annotations["taxid"], 90370) else: self.assertEqual( hit.target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhimurium str. LT2] >sp|P0A2H4.1| RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhi]", ) self.assertEqual(len(hit), 1) hsp = hit[0] self.assertEqual(hsp.num, 1) self.assertAlmostEqual(hsp.score, 358.0) self.assertAlmostEqual(hsp.annotations["bit score"], 142.51) self.assertAlmostEqual(hsp.annotations["evalue"], 1.0691e-44, places=50) self.assertEqual(hsp.annotations["identity"], 75) if xml2: self.assertEqual(hsp.shape, (2, 0)) else: self.assertEqual(hsp.annotations["positive"], 79) self.assertEqual(hsp.annotations["gaps"], 5) self.assertTrue( np.array_equal( hsp.coordinates, # fmt: off np.array([[ 0, 69, 69, 84], [ 14, 83, 88, 103]]) # fmt: on ) ) self.assertEqual(hsp.shape, (2, 89)) if xml2: self.assertEqual(repr(hsp.query.seq), "Seq(None, length=103)") else: self.assertEqual( repr(hsp.query.seq), "Seq({14: 'MGGISIWQLLIIAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDEPKQDKTS...EQV'}, length=103)", ) if xml2: self.assertEqual(hsp.query.id, "lcl|Query_1") else: self.assertEqual(hsp.query.id, "Query_1") self.assertEqual( hsp.query.description, "WP_001234791.1 Sec-independent protein translocase subunit TatA [Shigella flexneri]", ) self.assertEqual(len(hsp.query.features), 0) if xml2: self.assertEqual(repr(hsp.target.seq), "Seq(None, length=84)") else: self.assertEqual( repr(hsp.target.seq), "Seq('MGGISIWQLLIVAVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDDDAKQDKTS...EQV')", ) self.assertEqual(hsp.target.id, "sp|P0A2H3.1|") if xml2: self.assertEqual( hsp.target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhimurium str. LT2]", ) else: self.assertEqual( hsp.target.description, "RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhimurium str. LT2] >sp|P0A2H4.1| RecName: Full=Sec-independent protein translocase protein TatA [Salmonella enterica subsp. enterica serovar Typhi]", ) self.assertEqual(len(hsp.target.features), 0) if not xml2: self.assertEqual( hsp.annotations["midline"], "MGGISIWQLLI+AVIVVLLFGTKKLGSIGSDLGASIKGFKKAMSDD+ KQDKTSQDADFTAK+IADKQ +AK EDAK DKEQV", ) class TestBlastErrors(unittest.TestCase): """Tests if the Blast XML parser raises the appropriate Exception.""" def test_not_xml(self): """Try to parse a FASTA file.""" message = "Failed to parse the XML data (syntax error: line 1, column 0). Please make sure that the input data are in XML format." filename = "wisteria.nu" path = os.path.join("Fasta", filename) with open(path, "rb") as stream: with self.assertRaises(Blast.NotXMLError) as cm: records = Blast.parse(stream) self.assertEqual(str(cm.exception), message) with self.assertRaises(Blast.NotXMLError) as cm: records = Blast.parse(path) self.assertEqual(str(cm.exception), message) def test_premature_end_header(self): """Try to parse an XML file terminating in the header.""" message = r"^premature end of XML file: line [0-9]\d*, column [0-9]\d*$" filename = "broken1.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: with self.assertRaises(ValueError) as cm: records = Blast.parse(stream) self.assertRegex(str(cm.exception), message) with self.assertRaises(ValueError) as cm: records = Blast.parse(path) self.assertRegex(str(cm.exception), message) with open(path, "rb") as stream: with self.assertRaises(ValueError) as cm: records = Blast.read(stream) self.assertRegex(str(cm.exception), message) with self.assertRaises(ValueError) as cm: records = Blast.read(path) self.assertRegex(str(cm.exception), message) def test_premature_end_first_block(self): """Try to parse an XML file terminating within the first block.""" message = r"^premature end of XML file: line [0-9]\d*, column [0-9]\d*$" filename = "broken2.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) with self.assertRaises(ValueError) as cm: record = next(records) self.assertRegex(str(cm.exception), message) with Blast.parse(path) as records: with self.assertRaises(ValueError) as cm: record = next(records) self.assertRegex(str(cm.exception), message) with open(path, "rb") as stream: with self.assertRaises(ValueError) as cm: record = Blast.read(stream) self.assertRegex(str(cm.exception), message) with self.assertRaises(ValueError) as cm: record = Blast.read(path) self.assertRegex(str(cm.exception), message) def test_premature_end_second_block(self): """Try to parse an XML file terminating in the second block.""" message = r"^premature end of XML file: line [0-9]\d*, column [0-9]\d*$" filename = "broken3.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) with self.assertRaises(ValueError) as cm: record = next(records) self.assertRegex(str(cm.exception), message) with Blast.parse(path) as records: with self.assertRaises(ValueError) as cm: record = next(records) self.assertRegex(str(cm.exception), message) with open(path, "rb") as stream: with self.assertRaises(ValueError) as cm: record = Blast.read(stream) self.assertRegex(str(cm.exception), message) with self.assertRaises(ValueError) as cm: record = Blast.read(path) self.assertRegex(str(cm.exception), message) def test_premature_end_after_one_record(self): """Try to parse an XML file terminating after the first record.""" message = r"^premature end of XML file: line [0-9]\d*, column [0-9]\d*$" filename = "broken4.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: records = Blast.parse(stream) record = next(records) with self.assertRaises(ValueError) as cm: record = next(records) self.assertRegex(str(cm.exception), message) with Blast.parse(path) as records: record = next(records) with self.assertRaises(ValueError) as cm: record = next(records) self.assertRegex(str(cm.exception), message) with open(path, "rb") as stream: with self.assertRaises(ValueError) as cm: record = Blast.read(stream) self.assertRegex(str(cm.exception), message) with self.assertRaises(ValueError) as cm: record = Blast.read(path) self.assertRegex(str(cm.exception), message) def test_corrupt_xml(self): """Try to parse a broken XML file.""" message = "Failed to parse the XML data (not well-formed (invalid token): line 10, column 2). Please make sure that the input data are not corrupted." filename = "broken5.xml" path = os.path.join("Blast", filename) with open(path, "rb") as stream: with self.assertRaises(Blast.CorruptedXMLError) as cm: records = Blast.parse(stream) self.assertEqual(str(cm.exception), message) with self.assertRaises(Blast.CorruptedXMLError) as cm: with Blast.parse(path): pass self.assertEqual(str(cm.exception), message) with open(path, "rb") as stream: with self.assertRaises(Blast.CorruptedXMLError) as cm: record = Blast.read(stream) self.assertEqual(str(cm.exception), message) with self.assertRaises(Blast.CorruptedXMLError) as cm: record = Blast.read(path) self.assertEqual(str(cm.exception), message) if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity=2) unittest.main(testRunner=runner)