mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Add test cases for blat-psl with protein queries and negative strand hits
This commit is contained in:
@ -13,10 +13,17 @@ psl_34_002.psl PSL format, single query, no hits
|
||||
psl_34_003.psl PSL format, single query, hits with single HSP
|
||||
psl_34_004.psl PSL format, single query, hits with multiple HSPs
|
||||
psl_34_005.psl PSL format, multiple queries, no header
|
||||
psl_35_001.psl PSL format, protein query
|
||||
|
||||
pslx_34_001.pslx PSLX format, multiple queries
|
||||
pslx_34_002.pslx PSLX format, single query, no hits
|
||||
pslx_34_003.pslx PSLX format, single query, hits with single HSP
|
||||
pslx_34_004.pslx PSLX format, single query, hits with multiple HSPs
|
||||
pslx_34_005.pslx PSLX format, multiple queries, no header
|
||||
|
||||
|
||||
BLAT v35
|
||||
--------
|
||||
psl_35_001.psl PSL format, protein query
|
||||
psl_35_002.psl PSL format, protein query with hits on negative strand
|
||||
|
||||
psl_35_002.pslx PSLX format, protein query with hits on negative strand
|
||||
|
8
Tests/Blat/psl_35_002.psl
Normal file
8
Tests/Blat/psl_35_002.psl
Normal file
@ -0,0 +1,8 @@
|
||||
psLayout version 3
|
||||
|
||||
match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts
|
||||
match match count bases count bases name size start end name size start end count
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
210 3 0 0 0 0 6 31299 ++ CAG33136.1 230 17 230 KI537979 14052872 9712654 9744592 7 44,52,29,16,25,20,27, 17,61,113,142,158,183,203, 9712654,9715941,9716445,9718374,9739264,9743706,9744511,
|
||||
207 22 0 0 1 1 1 -1 ++ CAG33136.1 230 0 230 KI538594 7819582 2103463 2104149 2 20,209, 0,21, 2103463,2103522,
|
||||
204 6 0 0 1 20 1 1 +- CAG33136.1 230 0 230 KI537194 37111980 20872390 20873021 2 183,27, 0,203, 16238959,16239509,
|
8
Tests/Blat/pslx_35_002.pslx
Normal file
8
Tests/Blat/pslx_35_002.pslx
Normal file
@ -0,0 +1,8 @@
|
||||
psLayout version 3
|
||||
|
||||
match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts
|
||||
match match count bases count bases name size start end name size start end count
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
210 3 0 0 0 0 6 31299 ++ CAG33136.1 230 17 230 KI537979 14052872 9712654 9744592 7 44,52,29,16,25,20,27, 17,61,113,142,158,183,203, 9712654,9715941,9716445,9718374,9739264,9743706,9744511, QFLKQLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEK,YEVFRTEEEEKIKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHF,ESGSTLKKFLEESVSMSPEERARYLENYD,AIRVTHETSAHEGQTE,APSIDEKVDLHFIALVHVDGHLYEL,DGRKPFPINHGETSDETLLE,DAIEVCKKFMERDPDELRFNAIALSAA, QFLKQLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEK,YEIFRTEEEEKIKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHF,ESGSTLKKFLEESASMSPEERARYLENYD,AIRVTHETSAHEGQTE,APNIDEKVDLHFIALVHVDGHLYEL,DGRKPFPINHGETSDETLLE,DAIEVCKKFMERDPDELRFNAIALSAA,
|
||||
207 22 0 0 1 1 1 -1 ++ CAG33136.1 230 0 230 KI538594 7819582 2103463 2104149 2 20,209, 0,21, 2103463,2103522, MEGQRWLPLEANPEVTNQFL,QLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEKYEVFRTEEEEKIKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHFESGSTLKKFLEESVSMSPEERARYLENYDAIRVTHETSAHEGQTEAPSIDEKVDLHFIALVHVDGHLYELDGRKPFPINHGETSDETLLEDAIEVCKKFMERDPDELRFNAIALSAA, MEGQCWLPLEANPEVTNQLL,QLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEKYEVFRTEEEEKIKSQGQNITSSGYFMRQTISSACGTIGLIHAIANNKDKMHFESGSTLKKFLEESASLSPEERAIYLENYDSIRVTHKTSDHEGQTEAQNIDEKVDLHFIALVHVDGHLYELDGWKPFPINHGETSDATLLRDAIEVFKKFRERDPDERRFNVIALSAA,
|
||||
204 6 0 0 1 20 1 1 +- CAG33136.1 230 0 230 KI537194 37111980 20872390 20873021 2 183,27, 0,203, 16238959,16239509, MEGQRWLPLEANPEVTNQFLKQLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEKYEVFRTEEEEKIKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHFESGSTLKKFLEESVSMSPEERARYLENYDAIRVTHETSAHEGQTEAPSIDEKVDLHFIALVHVDGHLYEL,DAIEVCKKFMERDPDELRFNAIALSAA, MESQRWLPLEANPEVTNQFLKQLGLHPNWQCVDVYGMDPELLSMVPRPVCAVLLLFPITEKYEIFRTEEEEKTKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHFESGSTLKKFLEESASMSPEERARYLENYDAIRVTHETSAHEGQTEAPNIDEKVDLHFIALVHVDGHLYEL,DAIEVCKKFMERDPDELRFNAIALSAA,
|
@ -1018,6 +1018,52 @@ class BlatPslCases(unittest.TestCase):
|
||||
self.assertEqual([(61, 113)], hsp.query_range_all)
|
||||
self.assertEqual([(75566694, 75566850)], hsp.hit_range_all)
|
||||
|
||||
def test_psl_35_002(self, testf='psl_35_002.psl', pslx=False):
|
||||
"""Test parsing blat output (psl_35_002.psl)"""
|
||||
blat_file = get_file(testf)
|
||||
self.qresults = list(parse(blat_file, FMT, pslx=pslx))
|
||||
self.assertEqual(1, len(self.qresults))
|
||||
# check common attributes
|
||||
for qresult in self.qresults:
|
||||
for hit in qresult:
|
||||
self.assertEqual(qresult.id, hit.query_id)
|
||||
for hsp in hit:
|
||||
self.assertEqual(hit.id, hsp.hit_id)
|
||||
self.assertEqual(qresult.id, hsp.query_id)
|
||||
|
||||
# test first qresult
|
||||
qresult = self.qresults[0]
|
||||
self.assertEqual('CAG33136.1', qresult.id)
|
||||
self.assertEqual('blat', qresult.program)
|
||||
self.assertEqual(230, qresult.seq_len)
|
||||
self.assertEqual(3, len(qresult))
|
||||
# first qresult, last hit
|
||||
hit = qresult[-1]
|
||||
self.assertEqual('KI537194', hit.id)
|
||||
self.assertEqual(37111980, hit.seq_len)
|
||||
self.assertEqual(1, len(hit.hsps))
|
||||
# # first qresult, last hit, first hsp
|
||||
hsp = hit.hsps[-1]
|
||||
self.assertEqual(204, hsp.match_num)
|
||||
self.assertEqual(0, hsp.match_rep_num)
|
||||
self.assertEqual(6, hsp.mismatch_num)
|
||||
self.assertEqual(0, hsp.n_num)
|
||||
self.assertEqual(1, hsp.query_gapopen_num)
|
||||
self.assertEqual(20, hsp.query_gap_num)
|
||||
self.assertEqual(1, hsp.hit_gapopen_num)
|
||||
self.assertEqual(1, hsp.hit_gap_num)
|
||||
self.assertEqual(0, hsp[0].query_strand)
|
||||
self.assertEqual(-1, hsp[0].hit_strand)
|
||||
self.assertEqual(0, hsp.query_start)
|
||||
self.assertEqual(20872390, hsp.hit_start)
|
||||
self.assertEqual(230, hsp.query_end)
|
||||
self.assertEqual(20873021, hsp.hit_end)
|
||||
self.assertEqual(2, len(hsp))
|
||||
self.assertEqual([183, 27], hsp.query_span_all)
|
||||
self.assertEqual([549, 81], hsp.hit_span_all)
|
||||
self.assertEqual([(0, 183), (203, 230)], hsp.query_range_all)
|
||||
self.assertEqual([(20872472, 20873021), (20872390, 20872471)], hsp.hit_range_all)
|
||||
|
||||
|
||||
class BlatPslxCases(BlatPslCases):
|
||||
|
||||
@ -1215,6 +1261,22 @@ class BlatPslxCases(BlatPslCases):
|
||||
self.assertEqual('tgggattacaggtgtgagccaccacgcccagcccctttg', str(hsp.query_all[0].seq))
|
||||
self.assertEqual('tgggatgacaggggtgaggcaccacgcccagcccctttg', str(hsp.hit_all[0].seq))
|
||||
|
||||
def test_pslx_35_002(self, testf='pslx_35_002.pslx'):
|
||||
"""Test parsing blat output (pslx_35_002.pslx)"""
|
||||
BlatPslCases.test_psl_35_002(self, 'pslx_35_002.pslx', pslx=True)
|
||||
|
||||
# first qresult, last hit, first hsp
|
||||
qresult = self.qresults[0]
|
||||
hsp = qresult[-1].hsps[0]
|
||||
|
||||
self.assertEqual('MEGQRWLPLEANPEVTNQFLKQLGLHPNWQFVDVY', str(hsp.query_all[0].seq)[:35])
|
||||
self.assertEqual('ETSAHEGQTEAPSIDEKVDLHFIALVHVDGHLYEL', str(hsp.query_all[0].seq)[-35:])
|
||||
self.assertEqual('DAIEVCKKFMERDPDELRFNAIALSAA', str(hsp.query_all[1].seq))
|
||||
|
||||
self.assertEqual('MESQRWLPLEANPEVTNQFLKQLGLHPNWQCVDVY', str(hsp.hit_all[0].seq)[:35])
|
||||
self.assertEqual('ETSAHEGQTEAPNIDEKVDLHFIALVHVDGHLYEL', str(hsp.hit_all[0].seq)[-35:])
|
||||
self.assertEqual('DAIEVCKKFMERDPDELRFNAIALSAA', str(hsp.hit_all[1].seq))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
|
Reference in New Issue
Block a user