Add test cases for blat-psl with protein queries and negative strand hits

This commit is contained in:
bow
2017-05-18 08:24:24 +02:00
committed by Wibowo Arindrarto
parent 629668722d
commit 4c039a5ce4
4 changed files with 86 additions and 1 deletions

View File

@ -13,10 +13,17 @@ psl_34_002.psl PSL format, single query, no hits
psl_34_003.psl PSL format, single query, hits with single HSP
psl_34_004.psl PSL format, single query, hits with multiple HSPs
psl_34_005.psl PSL format, multiple queries, no header
psl_35_001.psl PSL format, protein query
pslx_34_001.pslx PSLX format, multiple queries
pslx_34_002.pslx PSLX format, single query, no hits
pslx_34_003.pslx PSLX format, single query, hits with single HSP
pslx_34_004.pslx PSLX format, single query, hits with multiple HSPs
pslx_34_005.pslx PSLX format, multiple queries, no header
BLAT v35
--------
psl_35_001.psl PSL format, protein query
psl_35_002.psl PSL format, protein query with hits on negative strand
psl_35_002.pslx PSLX format, protein query with hits on negative strand

View File

@ -0,0 +1,8 @@
psLayout version 3
match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts
match match count bases count bases name size start end name size start end count
---------------------------------------------------------------------------------------------------------------------------------------------------------------
210 3 0 0 0 0 6 31299 ++ CAG33136.1 230 17 230 KI537979 14052872 9712654 9744592 7 44,52,29,16,25,20,27, 17,61,113,142,158,183,203, 9712654,9715941,9716445,9718374,9739264,9743706,9744511,
207 22 0 0 1 1 1 -1 ++ CAG33136.1 230 0 230 KI538594 7819582 2103463 2104149 2 20,209, 0,21, 2103463,2103522,
204 6 0 0 1 20 1 1 +- CAG33136.1 230 0 230 KI537194 37111980 20872390 20873021 2 183,27, 0,203, 16238959,16239509,

View File

@ -0,0 +1,8 @@
psLayout version 3
match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts
match match count bases count bases name size start end name size start end count
---------------------------------------------------------------------------------------------------------------------------------------------------------------
210 3 0 0 0 0 6 31299 ++ CAG33136.1 230 17 230 KI537979 14052872 9712654 9744592 7 44,52,29,16,25,20,27, 17,61,113,142,158,183,203, 9712654,9715941,9716445,9718374,9739264,9743706,9744511, QFLKQLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEK,YEVFRTEEEEKIKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHF,ESGSTLKKFLEESVSMSPEERARYLENYD,AIRVTHETSAHEGQTE,APSIDEKVDLHFIALVHVDGHLYEL,DGRKPFPINHGETSDETLLE,DAIEVCKKFMERDPDELRFNAIALSAA, QFLKQLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEK,YEIFRTEEEEKIKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHF,ESGSTLKKFLEESASMSPEERARYLENYD,AIRVTHETSAHEGQTE,APNIDEKVDLHFIALVHVDGHLYEL,DGRKPFPINHGETSDETLLE,DAIEVCKKFMERDPDELRFNAIALSAA,
207 22 0 0 1 1 1 -1 ++ CAG33136.1 230 0 230 KI538594 7819582 2103463 2104149 2 20,209, 0,21, 2103463,2103522, MEGQRWLPLEANPEVTNQFL,QLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEKYEVFRTEEEEKIKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHFESGSTLKKFLEESVSMSPEERARYLENYDAIRVTHETSAHEGQTEAPSIDEKVDLHFIALVHVDGHLYELDGRKPFPINHGETSDETLLEDAIEVCKKFMERDPDELRFNAIALSAA, MEGQCWLPLEANPEVTNQLL,QLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEKYEVFRTEEEEKIKSQGQNITSSGYFMRQTISSACGTIGLIHAIANNKDKMHFESGSTLKKFLEESASLSPEERAIYLENYDSIRVTHKTSDHEGQTEAQNIDEKVDLHFIALVHVDGHLYELDGWKPFPINHGETSDATLLRDAIEVFKKFRERDPDERRFNVIALSAA,
204 6 0 0 1 20 1 1 +- CAG33136.1 230 0 230 KI537194 37111980 20872390 20873021 2 183,27, 0,203, 16238959,16239509, MEGQRWLPLEANPEVTNQFLKQLGLHPNWQFVDVYGMDPELLSMVPRPVCAVLLLFPITEKYEVFRTEEEEKIKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHFESGSTLKKFLEESVSMSPEERARYLENYDAIRVTHETSAHEGQTEAPSIDEKVDLHFIALVHVDGHLYEL,DAIEVCKKFMERDPDELRFNAIALSAA, MESQRWLPLEANPEVTNQFLKQLGLHPNWQCVDVYGMDPELLSMVPRPVCAVLLLFPITEKYEIFRTEEEEKTKSQGQDVTSSVYFMKQTISNACGTIGLIHAIANNKDKMHFESGSTLKKFLEESASMSPEERARYLENYDAIRVTHETSAHEGQTEAPNIDEKVDLHFIALVHVDGHLYEL,DAIEVCKKFMERDPDELRFNAIALSAA,

View File

@ -1018,6 +1018,52 @@ class BlatPslCases(unittest.TestCase):
self.assertEqual([(61, 113)], hsp.query_range_all)
self.assertEqual([(75566694, 75566850)], hsp.hit_range_all)
def test_psl_35_002(self, testf='psl_35_002.psl', pslx=False):
"""Test parsing blat output (psl_35_002.psl)"""
blat_file = get_file(testf)
self.qresults = list(parse(blat_file, FMT, pslx=pslx))
self.assertEqual(1, len(self.qresults))
# check common attributes
for qresult in self.qresults:
for hit in qresult:
self.assertEqual(qresult.id, hit.query_id)
for hsp in hit:
self.assertEqual(hit.id, hsp.hit_id)
self.assertEqual(qresult.id, hsp.query_id)
# test first qresult
qresult = self.qresults[0]
self.assertEqual('CAG33136.1', qresult.id)
self.assertEqual('blat', qresult.program)
self.assertEqual(230, qresult.seq_len)
self.assertEqual(3, len(qresult))
# first qresult, last hit
hit = qresult[-1]
self.assertEqual('KI537194', hit.id)
self.assertEqual(37111980, hit.seq_len)
self.assertEqual(1, len(hit.hsps))
# # first qresult, last hit, first hsp
hsp = hit.hsps[-1]
self.assertEqual(204, hsp.match_num)
self.assertEqual(0, hsp.match_rep_num)
self.assertEqual(6, hsp.mismatch_num)
self.assertEqual(0, hsp.n_num)
self.assertEqual(1, hsp.query_gapopen_num)
self.assertEqual(20, hsp.query_gap_num)
self.assertEqual(1, hsp.hit_gapopen_num)
self.assertEqual(1, hsp.hit_gap_num)
self.assertEqual(0, hsp[0].query_strand)
self.assertEqual(-1, hsp[0].hit_strand)
self.assertEqual(0, hsp.query_start)
self.assertEqual(20872390, hsp.hit_start)
self.assertEqual(230, hsp.query_end)
self.assertEqual(20873021, hsp.hit_end)
self.assertEqual(2, len(hsp))
self.assertEqual([183, 27], hsp.query_span_all)
self.assertEqual([549, 81], hsp.hit_span_all)
self.assertEqual([(0, 183), (203, 230)], hsp.query_range_all)
self.assertEqual([(20872472, 20873021), (20872390, 20872471)], hsp.hit_range_all)
class BlatPslxCases(BlatPslCases):
@ -1215,6 +1261,22 @@ class BlatPslxCases(BlatPslCases):
self.assertEqual('tgggattacaggtgtgagccaccacgcccagcccctttg', str(hsp.query_all[0].seq))
self.assertEqual('tgggatgacaggggtgaggcaccacgcccagcccctttg', str(hsp.hit_all[0].seq))
def test_pslx_35_002(self, testf='pslx_35_002.pslx'):
"""Test parsing blat output (pslx_35_002.pslx)"""
BlatPslCases.test_psl_35_002(self, 'pslx_35_002.pslx', pslx=True)
# first qresult, last hit, first hsp
qresult = self.qresults[0]
hsp = qresult[-1].hsps[0]
self.assertEqual('MEGQRWLPLEANPEVTNQFLKQLGLHPNWQFVDVY', str(hsp.query_all[0].seq)[:35])
self.assertEqual('ETSAHEGQTEAPSIDEKVDLHFIALVHVDGHLYEL', str(hsp.query_all[0].seq)[-35:])
self.assertEqual('DAIEVCKKFMERDPDELRFNAIALSAA', str(hsp.query_all[1].seq))
self.assertEqual('MESQRWLPLEANPEVTNQFLKQLGLHPNWQCVDVY', str(hsp.hit_all[0].seq)[:35])
self.assertEqual('ETSAHEGQTEAPNIDEKVDLHFIALVHVDGHLYEL', str(hsp.hit_all[0].seq)[-35:])
self.assertEqual('DAIEVCKKFMERDPDELRFNAIALSAA', str(hsp.hit_all[1].seq))
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)