Files
biopython/Tests/test_Align_chain.py
mdehoon 66ad0a062e Extend the .counts method of an Alignment (#5011)
* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* updat

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* halfway finished

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* pdate

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* replace precompiler #defines by inline functions

* update

* update

* add tests

* update

* update

* update

* update

* update

* update

* update

* update

* documentation

* update

* avoid stpcpy

* pointer printing

* compiler warning

* testing without codonalign

* testing without codonalign and pairwisealigner

* compiler warning

* adding codonalign and pairwisealigner back in

* remove inline from check_indices

* add inline to check_indices

* update

* add test line 12287 test_pairwise_aligner.py

* update

* update

* update

* update

* change dtype from int32 to "i"

* all done

* testing

* testing

* testing

* testing

* done

* done

* fix how pointers are printed on Windows

* update

* update

* update

* update

* fix id printing on pypy

* style change only

* Use Py_uintptr_t instead of uintptr_t

* fix memory leak

* remove double semicolon

* check if GitHub actions are now pickup up Python version 3.13.5 without hardcoding it

---------

Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local>
Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp>
Co-authored-by: Michiel de Hoon <michiel.dehoon@riken.jp>
2025-07-11 14:40:07 +09:00

7282 lines
284 KiB
Python

# Copyright 2023 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for Align.chain module."""
import unittest
from io import StringIO
from tempfile import NamedTemporaryFile
from Bio import Align
from Bio import SeqIO
from Bio.Align import Alignment
from Bio.Seq import reverse_complement
from Bio.Seq import Seq
from Bio.SeqFeature import CompoundLocation
from Bio.SeqFeature import ExactPosition
from Bio.SeqFeature import SimpleLocation
from Bio.SeqRecord import SeqRecord
try:
import numpy as np
except ImportError:
from Bio import MissingPythonDependencyError
raise MissingPythonDependencyError(
"Install numpy if you want to use Bio.Align.chain."
) from None
class TestAlign_dna_rna(unittest.TestCase):
# The chain file dna_rna.chain was generated from the PSL file using:
# pslToChain dna_rna.psl dna_rna.chain
def setUp(self):
data = {}
records = SeqIO.parse("Blat/dna.fa", "fasta")
for record in records:
name, start_end = record.id.split(":")
assert name == "chr3"
start, end = start_end.split("-")
start = int(start)
end = int(end)
sequence = str(record.seq)
assert len(sequence) == end - start
data[start] = sequence
self.dna = data
records = SeqIO.parse("Blat/rna.fa", "fasta")
self.rna = {record.id: record.seq for record in records}
def check_alignments(self, alignments):
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 1711))
self.assertEqual(len(alignment), 2)
self.assertEqual(alignment.score, 176)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_046654.1")
self.assertEqual(len(alignment.target.seq), 198295559)
self.assertEqual(len(alignment.query.seq), 181)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42530895, 42530958, 42532020, 42532095, 42532563, 42532606],
[ 181, 118, 118, 43, 43, 0]])
# fmt: on
)
)
dna = Seq(self.dna, length=len(alignment.target))
alignment.target.seq = dna
alignment.query.seq = self.rna[alignment.query.id]
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[36., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 40., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 57., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 42., 0., 0., 0., 0.],
[ 2., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 3., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0.],
])
# fmt: on
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGTacgt")
self.assertEqual(
str(alignment),
"""\
chr3 42530895 CGGAAGTACTTCTGGGGGTACATACTCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCC
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NR_046654 181 CGGAAGTACTTCTGGGGGTACATACTCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCC
chr3 42530955 AGGCTGGGGACAGAGGGGGCAAGGCCTGGAGAACTCCCTAGGGGGAGGGTGCCAACCCAG
60 |||---------------------------------------------------------
NR_046654 121 AGG---------------------------------------------------------
chr3 42531015 CTTGCAGTCCTACGTCTTGCTTAGCTGCAGGTCCTGCCTGCAAGGATATCAGCCAAGGGT
120 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531075 CAAGAAAGTCCTCAAAATGTCTGATCCCAGGACAAGTCCCTCAGGTTGCAGCTGCACCTA
180 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531135 GGGCTGACCTGTGGGACAGATTTTGTGAACATCTTTCCATTTCCCTTTAGTTCCCGAAAT
240 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531195 ACAcagggccactgctaatctataaagggcctctgtcacaattagaaagagaatgtccgt
300 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531255 ctaggtagacacagcccttcaggcatacagcttCACCCCCTCAGTGGAGCATCCCTCCGT
360 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531315 GGTGAACAACCTATGCAACCAAAGACAGCAGACTGACAACCCACCCTTTtctctctccct
420 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531375 ccctctccctgcttttctccaaaatctctccctcatgccctctacccctgcttcctgtgc
480 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531435 cctctctgctctttcactctccctGGGCCTGACAGGGGTACCCAGCACATTCACCATGGT
540 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531495 GTGGACCATCGCCAGGATCCATTTTGAGGATTATGGGTGAGCTGCTGCCCCACACACTCC
600 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531555 CCCGGCCGCCATCACTTGGGCAGGCCCCCTGGGTGGGATGATAATGCCATCTGGCCTTGG
660 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531615 TGAGTGGACAAAAACCACAGCTCTCGGGCCAGAGGGGAGGCTGGAGGAGGACCTGGGGAG
720 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531675 CAACAGACTCTGGGCCCGGGGTTGCTAAAGTGCTCAGGAGCAGAGCTGGGGACAACTGGG
780 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531735 GGAGGTGCTGCTGAGTCTCTCTCTGGCTGAGGACAATCCCTCTCATTCCTCCCCACGGTC
840 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531795 TGCTCAGGTGCTGGGACACCATCAACTCCTCACTGTGGTGGATCATAAAGGGCCCCATCC
900 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531855 TCACCTCCATCTTGGTAAGATaccctcccaccacctagagatggggaaacaggcccaaag
960 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531915 ggcaggcaacttagcccaaggtcacatgggaaattagtatctaggtcagaactgaaacgt
1020 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531975 agcttcctaatgcccaatgcaggatcatccccacccctgtcctaccagTTCTTCCTTGAG
1080 ---------------------------------------------...||||||||||||
NR_046654 118 ---------------------------------------------CAGTTCTTCCTTGAG
chr3 42532035 CGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGAC
1140 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NR_046654 103 CGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGAC
chr3 42532095 CTGCCCAAAGGGGAAATGCCAGAGGAGAGGTAAGATAGAGAGAGGGGCAGCAGGACCCTG
1200 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532155 GGAAAGAAGACAGGCCAGCAGTCAAGGGGCCTGAACACCTCAGCCTTCCCGCTCTGACTG
1260 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532215 CCCGAACTCGGGTCCCCACCCACTAGGTAAACTTCATCCTGTTTATTTGCATCATCCGAA
1320 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532275 TCCTGCTTCAGAAACTGCGGCCCCCAGATATCAGGAAGAGTGACAGCAGTCCATACTCGT
1380 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532335 GAGTGTGGGCCTAGTGCCTCAGCCCCCAGTACCTCCATCCCCAGTCCTCAAATCATCCCA
1440 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532395 CATCTCCTTGAAGTCCTCCCACCCCAAACATCCAGAGTCACCAAAGAGCCACATTGTTCT
1500 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532455 TTCCCACCTCCACCATGGCCTGGCTcagcccaccaccatcccctgctccagccccaccct
1560 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532515 caCCAGGCTGCACTCAGAGCCCTGCATGCTTCTCCTGCCCACACTCACCTAGCATCCTTC
1620 ------------------------------------------------||||||||||||
NR_046654 43 ------------------------------------------------CTAGCATCCTTC
chr3 42532575 CCAGGTATGCATCTGCTGCCAAGCCAGGgag 42532606
1680 ||||||||||||||||||||||||||||... 1711
NR_046654 31 CCAGGTATGCATCTGCTGCCAAGCCAGGGAG 0
""",
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 176 chr3 198295559 + 42530895 42532606 NR_046654.1 181 - 0 181 1
63 1062 0
75 468 0
43
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (181 aligned letters; 175 identities; 6 mismatches; 1530 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 181:
identities = 175,
mismatches = 6.
gaps = 1530:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 1530:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 1530:
open_internal_deletions = 2,
extend_internal_deletions = 1528;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 1530)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 1530)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 1530)
self.assertEqual(counts.gaps, 1530)
self.assertEqual(counts.aligned, 181)
self.assertEqual(counts.identities, 175)
self.assertEqual(counts.mismatches, 6)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 1714))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_046654.1_modified")
self.assertEqual(len(alignment.target.seq), 198295559)
self.assertEqual(len(alignment.query.seq), 190)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42530895, 42530922, 42530922, 42530958, 42532020,
42532037, 42532039, 42532095, 42532563, 42532606],
[ 185, 158, 155, 119, 119,
102, 102, 46, 46, 3],
])
# fmt: on
)
)
dna = Seq(self.dna, length=len(alignment.target))
alignment.target.seq = dna
alignment.query.seq = self.rna[alignment.query.id]
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[34., 0., 0., 1., 0., 0., 0., 0.],
[ 0., 40., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 57., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 41., 0., 0., 0., 0.],
[ 2., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 3., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0.],
]),
# fmt: on
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGTacgt")
self.assertEqual(
str(alignment),
"""\
chr3 42530895 CGGAAGTACTTCTGGGGGTACATACTC---ATCGGCTGGGGTATGGTACCAGGGAGGGCT
0 |||||||||||||||||||||||||||---||||||||||||||||||||||||||||||
NR_046654 185 CGGAAGTACTTCTGGGGGTACATACTCCCCATCGGCTGGGGTATGGTACCAGGGAGGGCT
chr3 42530952 TCCAGGCTGGGGACAGAGGGGGCAAGGCCTGGAGAACTCCCTAGGGGGAGGGTGCCAACC
60 ||||||------------------------------------------------------
NR_046654 125 TCCAGG------------------------------------------------------
chr3 42531012 CAGCTTGCAGTCCTACGTCTTGCTTAGCTGCAGGTCCTGCCTGCAAGGATATCAGCCAAG
120 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531072 GGTCAAGAAAGTCCTCAAAATGTCTGATCCCAGGACAAGTCCCTCAGGTTGCAGCTGCAC
180 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531132 CTAGGGCTGACCTGTGGGACAGATTTTGTGAACATCTTTCCATTTCCCTTTAGTTCCCGA
240 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531192 AATACAcagggccactgctaatctataaagggcctctgtcacaattagaaagagaatgtc
300 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531252 cgtctaggtagacacagcccttcaggcatacagcttCACCCCCTCAGTGGAGCATCCCTC
360 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531312 CGTGGTGAACAACCTATGCAACCAAAGACAGCAGACTGACAACCCACCCTTTtctctctc
420 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531372 cctccctctccctgcttttctccaaaatctctccctcatgccctctacccctgcttcctg
480 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531432 tgccctctctgctctttcactctccctGGGCCTGACAGGGGTACCCAGCACATTCACCAT
540 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531492 GGTGTGGACCATCGCCAGGATCCATTTTGAGGATTATGGGTGAGCTGCTGCCCCACACAC
600 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531552 TCCCCCGGCCGCCATCACTTGGGCAGGCCCCCTGGGTGGGATGATAATGCCATCTGGCCT
660 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531612 TGGTGAGTGGACAAAAACCACAGCTCTCGGGCCAGAGGGGAGGCTGGAGGAGGACCTGGG
720 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531672 GAGCAACAGACTCTGGGCCCGGGGTTGCTAAAGTGCTCAGGAGCAGAGCTGGGGACAACT
780 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531732 GGGGGAGGTGCTGCTGAGTCTCTCTCTGGCTGAGGACAATCCCTCTCATTCCTCCCCACG
840 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531792 GTCTGCTCAGGTGCTGGGACACCATCAACTCCTCACTGTGGTGGATCATAAAGGGCCCCA
900 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531852 TCCTCACCTCCATCTTGGTAAGATaccctcccaccacctagagatggggaaacaggccca
960 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531912 aagggcaggcaacttagcccaaggtcacatgggaaattagtatctaggtcagaactgaaa
1020 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531972 cgtagcttcctaatgcccaatgcaggatcatccccacccctgtcctaccagTTCTTCCTT
1080 ------------------------------------------------...|||||||||
NR_046654 119 ------------------------------------------------CAGTTCTTCCTT
chr3 42532032 GAGCGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGAT
1140 |||||--|||||||||||.|||||||||||||||||||||||||||||||||||||||||
NR_046654 107 GAGCG--AGCGGATTGGGTGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGAT
chr3 42532092 GACCTGCCCAAAGGGGAAATGCCAGAGGAGAGGTAAGATAGAGAGAGGGGCAGCAGGACC
1200 |||---------------------------------------------------------
NR_046654 49 GAC---------------------------------------------------------
chr3 42532152 CTGGGAAAGAAGACAGGCCAGCAGTCAAGGGGCCTGAACACCTCAGCCTTCCCGCTCTGA
1260 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532212 CTGCCCGAACTCGGGTCCCCACCCACTAGGTAAACTTCATCCTGTTTATTTGCATCATCC
1320 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532272 GAATCCTGCTTCAGAAACTGCGGCCCCCAGATATCAGGAAGAGTGACAGCAGTCCATACT
1380 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532332 CGTGAGTGTGGGCCTAGTGCCTCAGCCCCCAGTACCTCCATCCCCAGTCCTCAAATCATC
1440 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532392 CCACATCTCCTTGAAGTCCTCCCACCCCAAACATCCAGAGTCACCAAAGAGCCACATTGT
1500 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532452 TCTTTCCCACCTCCACCATGGCCTGGCTcagcccaccaccatcccctgctccagccccac
1560 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532512 cctcaCCAGGCTGCACTCAGAGCCCTGCATGCTTCTCCTGCCCACACTCACCTAGCATCC
1620 ---------------------------------------------------|||||||||
NR_046654 46 ---------------------------------------------------CTAGCATCC
chr3 42532572 TTCCCAGGTATGCATCTGCTGCCAAGCCAGGgag 42532606
1680 |||||||||||||||||||||||||||||||... 1714
NR_046654 37 TTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAG 3
""",
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 170 chr3 198295559 + 42530895 42532606 NR_046654.1_modified 190 - 5 187 2
27 0 3
36 1062 0
17 2 0
56 468 0
43
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (179 aligned letters; 172 identities; 7 mismatches; 1535 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 179:
identities = 172,
mismatches = 7.
gaps = 1535:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 1535:
internal_insertions = 3:
open_internal_insertions = 1,
extend_internal_insertions = 2;
internal_deletions = 1532:
open_internal_deletions = 3,
extend_internal_deletions = 1529;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 3)
self.assertEqual(counts.internal_deletions, 1532)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 1535)
self.assertEqual(counts.insertions, 3)
self.assertEqual(counts.deletions, 1532)
self.assertEqual(counts.gaps, 1535)
self.assertEqual(counts.aligned, 179)
self.assertEqual(counts.identities, 172)
self.assertEqual(counts.mismatches, 7)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 5407))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_111921.1")
self.assertEqual(len(alignment.target.seq), 198295559)
self.assertEqual(len(alignment.query.seq), 216)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array( [[48663767, 48663813, 48665640, 48665722, 48669098, 48669174],
[ 0, 46, 46, 128, 128, 204]]),
# fmt: on
)
)
dna = Seq(self.dna, length=len(alignment.target.seq))
alignment.target.seq = dna
alignment.query.seq = self.rna[alignment.query.id]
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[53., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 35., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 50., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 27., 0., 0., 0., 0.],
[ 9., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 7., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 16., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 7., 0., 0., 0., 0.],
])
# fmt: on
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGTacgt")
self.assertEqual(
str(alignment),
"""\
chr3 48663767 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCGGTCAGTCATTGTTT
0 ||||||||||||||||||||||||||||||||||||||||||||||--------------
NR_111921 0 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCG--------------
chr3 48663827 CTATTGGCACAATGGGAGGCCCCGCCCCTCACGGCGGACTCATCGCATGGGGGAGGGGGC
60 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48663887 TCCGCGGGTTGCCGGCTAACCGTGAGAGAGTCCGGGAGGTACACTATACGGACCGGCCTC
120 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48663947 CAAAGGCGGAATCGATAACGAGCTGCAGCGCCGGGTGCAGAGGACGCGGGCATCCCGAAG
180 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664007 CCCAGGAAGAGGTCAGGGCCGGGACCCCAGAACGCTCCACAGGGTGCGGCTCCCGCGATG
240 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664067 GGGTGGATCCTGGTTCTAACAGGCGAGGAACTCCTGGCCAAGGCCTCTGGCCCGCCCCGA
300 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664127 ACGGTCCCTATGACATCACCATCAACCAATCAGTCGGCGCATCCTTTCGCCCCTTGACTG
360 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664187 CTCCGCTTCCGGGAGGCGGGGCTTCTGCGGGTTCCACCTCCCGAGCGCCCCTTGTGGCTA
420 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664247 CCAAGGTCAGGCAACAGGTGTCCAGTTGTTCCCTCTCCTGTCTACGAATCTGAGGACCTC
480 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664307 CCCAGGATCAGAGCTCTGGGCCTGATACACGGCCGGGGTTCCTACGGGTTTGTGAGTGGG
540 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664367 GGTGGAAGATCTGCAGAGGCACTTAGGGCTGAACTCCTTTGAATGGGAGCCAATCGGTGC
600 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664427 AGGGGCTGGAGGAGCGAGTCCCCCAAAGTAGttttatttatctatttagagacaaggtct
660 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664487 cactctttcggagtgcagtggtgatcacagctcaccgtagcctcgaactccccaggcgat
720 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664547 tctctcacctcagcctcccgagtagctgggactacgggtacatgtcatcacacttggcta
780 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664607 atttttgcattttttatagagacagggtctcaccatgtaggccagattagtcttgaactc
840 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664667 ctgggctcaagcaatccgcccatcttggcctcccaaagtgctgggattataggtgtgagc
900 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664727 caccgcgcccggcAACCCAGAAGTGGTTTTGACAGCAccagcgctttctgtgtccacaat
960 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664787 ctagtgagtagagggcacaaaacctgacaccacggaggcagacaggcaggggctctgccg
1020 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664847 gggaagggtgttggagtcccaaaggaggcgtctgagtcaccttcgcaacctgggacgcct
1080 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664907 tcttgcataagatgcctgagcagtgccttgaatgaccaaggggagatccgcatctgcaaa
1140 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664967 ggaagggcagggagggatagggattgggggtgggcatcctaggtcttggagactgtgtgg
1200 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665027 gcaaatgtgcagagacataaagggactatggctgagggaaatcaagCCCTGCCCTCTCAC
1260 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665087 CAATAGGGCTGGCGCTGGTCCCAGCTAACACTCCTTTTGGAGAGCAAAGCTCCTCACTTC
1320 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665147 TGAGTAGTGAGATTGATTGCGGATCACTCTCCATGTTGCTGCCTGCTGTGTGTCATCCCA
1380 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665207 CTGTCATCCTCCCTTTGTGGCTGTTCTGTGGAGCCCCTCTCCCTCAATCTGCACTCACCT
1440 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665267 CTATGCCCCAGCCCCATTGGCAGCTCCTAATGCACTCCCGGTaaaaaaaaaaaaacaaaa
1500 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665327 aCCAGATGTTAGTGATAGTGGTGGTAGTTCTTCTCTCCACCTCCAAATCTTGCCCTTGCC
1560 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665387 TCCTAATAAGACCCCTATGTGGTTTAACCTCAttttttttttttttttttttttttgaga
1620 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665447 tggagtttcactctgtcacccaggctggagtgaagtggtgtgatGGGGCTTCACCATGTg
1680 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665507 atggggcttcaccatgttggccaggctggtatcaaactcctgacctctagtgatctgccc
1740 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665567 gcctcagcctcccaaagtgctgggattaccggcatgaggcaccgtgcccagccTATCCTC
1800 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665627 CTTCTCTTATCAGCTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
1860 -------------|||||||||||||||||||||||||||||||||||||||||||||||
NR_111921 46 -------------CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
chr3 48665687 TGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGGGTAAGGGTGTGGCAGATACTGCCAC
1920 |||||||||||||||||||||||||||||||||||-------------------------
NR_111921 93 TGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGG-------------------------
chr3 48665747 TAACACTTCTCAGCCTTTCCTTCTCCTGCCTTTTCCACCCCACCCTGTGTTTGTCTACTC
1980 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48665807 CCAGCCAGGTGTACCTTTCCAGGGGAAGACCTGGCCAACCTGTCCAGCTCAATTAtccag
2040 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48665867 cagttctttgacctcactgagatctcgagtccattgttcatcacctcagctattgacctg
2100 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48665927 tgtcattagccttatagagttcagtgccacggaaactccctgccctgttctttttctttt
2160 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48665987 tctttttttttttttttttgagacagagccttgctctgtcgcccaggctggagtgcagtg
2220 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666047 gcgcgatctcggctcactgcaagctctgcctcccaggttcacaccattctcctgactcag
2280 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666107 cctcccgagtagctgggactacaggcgtccaccaccatgcccagctaatttttttttttg
2340 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666167 tatttttagtagagacggcgtttcaccgtgttagccaggctggtctcgatctcctgacct
2400 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666227 tgtgatgctcccgcctcggcctcccaaagtgctgggattacaggcatgagccattgtgcc
2460 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666287 cggcctgccctgttcttcttagacaaacttgctgggctaaaatctaaccccgttaaaata
2520 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666347 gactatttacgtattgtttgcctctagcgcagcagaacattgctggagaaaaacaaacaa
2580 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666407 ccgtgctaattggtctcattttatattcatgaccacaagcctcagtattatatcggaggg
2640 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666467 cctatccagtgcagtagggcaagaaaaataataagttatgaagattggaagggaaaaaaa
2700 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666527 actaattcacaagcagtaggattgtatatgtaaaaatttcaaaggaacctataggtaagt
2760 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666587 tgttagaatgagttcagcaaagttgttggacacaagatcaatatataaaaatcagttgca
2820 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666647 atttctatatgtcaccaacagttagaaaataaatttcttgcctgggcatgttggctcaag
2880 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666707 cctgtaatcccagcactttgggtggccaaggcgggcagatcacctgaggtcaggagtttg
2940 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666767 agaccagtttggccaacatggtgaaatcccgtctctactaaaaatacagaaattagccgg
3000 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666827 gcgtggtggtgggcacctgtagtcccagctactgaggaggctgaggcaggagaatcactt
3060 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666887 gaacctgggaggcagaggttgcagtgaacgagaaaaaaaaattttttttcttaaaaacaa
3120 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666947 tgatgtttacaatagcatcaagtaatatcaaatgctgaggaataaacctaatgaaagatg
3180 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667007 tgcaaagactacatacacacatacaaaaaaactataaaacattattgagggaaataaaga
3240 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667067 cataggcctggcattggtggctcatgcctgaaatctcagcactttggagggccaaggtgg
3300 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667127 gtggatcatttgaggtcaggagttagagatcagtccggccaacatggtgaaacctcatct
3360 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667187 ctactaaaaatacaaaaaaattagcttggccaggtgcagtggctcacacctgtaatccca
3420 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667247 gcactttgggaggctgaggcgggcggatcatgaggtcaggagatcgagaccatcctggct
3480 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667307 aacacggtgaaaccctgtctctactaaaaatacaaaaaaaaattagccgggcctgatggc
3540 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667367 gggcgcccgtagtcccagctactcgggaggctgaggtagcagaatggcgtgaacctggga
3600 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667427 ggtgcagcttgcagtgagcctaaattgcgccactgcactccagcctgggtaacagagcga
3660 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667487 gactccgtttcaaaaaaaaaaaaaaaaattagctgggcatgctgttgtgcacctgcaatc
3720 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667547 ccagctactctggaggatgaggcagaagtgcctgaacctgggacacagaggttgcagtga
3780 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667607 gccaagatcatgccattgcactccagcctggacaacacagccagacgctatctgaaaaaa
3840 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667667 aaaaaaaaaaaaaaagtaaaaaaaatgagaaataaagacataaataaagtgaaaaattgt
3900 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667727 tccaatattggaaaagtcaatattataaaggtgccaattttcccaaattgatatatggat
3960 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667787 tcgatgcaacttcagttaaaaatcccactaaattttggctgggtgcggtggctcacacct
4020 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667847 gtaatcccagcactttgggaggctgaggcgggcggatcacaaggtcaggagatcgagacc
4080 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667907 atcttggctaacatggtgaaaccgtctctactaaaaatacaaaagttagccgggtgtggt
4140 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667967 ggcgggcacctgtagtcccagctacttgggaggctgagacagaatggcgtgaacctgggg
4200 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668027 aggcggagcttgcagtgagccaagttgacgccactgcactccagcctgggcgacagagca
4260 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668087 agactctgtctcaaaaaaaaaaaaaaaaaaaTCCCACTAGATTTTGTGTGTGTGTAAACT
4320 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668147 GACAAACTAGATTTAGcagcctgagcaacacagcaaaaccccatctctacaaaaaataca
4380 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668207 aaaattttgcacatgcctgtatagtcccagctacttgggaggctgaagtgggaggatcat
4440 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668267 gtgagctctggggaggtcgaggctgtagtgagctatgatcacatgctgcactctagcctg
4500 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668327 ggcaacagagcaagagaccctgtatctaaaaaaagaatgaaaattaaaaaataaaaaGAa
4560 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668387 accaagattgtgtggtactggtacgaggataggaagactaaaggaacgaaatccagagac
4620 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668447 aggcctgaagatgtgtggaaacttgaattttgacaagggtgGTTCTTCAGAGCTAACATG
4680 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668507 AAGAAAGGGTTGTTTTCTTTTTTTTGTTTCCCcaggagcaactctattaactgaaagaat
4740 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668567 aggcttttcaataaatgatgctgggtcagttggatatccatatagaaaaaattaaatgag
4800 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668627 atctctatttcacactgcttgcataatcaattccatataaatttgacatctgaaaatata
4860 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668687 cagtttctagaaaacagtatTAAGACCttgttttgttttttgttgttgttgttttttgtt
4920 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668747 ttgttttttgttttttgagacagagtctcgctctgtcgccaggctggaatacagtggtgc
4980 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668807 aaccttggctcactgcaacctctgactccctagttcaagcaattctcctgcctcagcctc
5040 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668867 ccgagtagctgcgattacaggcacatgccaccacgcccagctaatttttgtatttttagt
5100 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668927 agagatgggggtttcaccatgttggccaggatggtctcgatctcctgaccctgtaatccg
5160 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668987 cccacctcggcctcccaaagtgctgggattacaggcgtgagccactgcacctggccAAGA
5220 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48669047 GAAGATCTTAAAGGTGACTTTAAGCAAACttttttttttttttttttacagagacgggag
5280 ---------------------------------------------------.........
NR_111921 128 ---------------------------------------------------AGACGGGAG
chr3 48669107 ctggagtgcagtggctgttcacaagcgtgaAAGCAAAGATTAAAAAATTTGTTTTTATAT
5340 ..............................||||||||||||||||||||||||||||||
NR_111921 137 CTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATAT
chr3 48669167 TAAAAAA 48669174
5400 ||||||| 5407
NR_111921 197 TAAAAAA 204
""",
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 182 chr3 198295559 + 48663767 48669174 NR_111921.1 216 + 0 204 3
46 1827 0
82 3376 0
76
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (204 aligned letters; 165 identities; 39 mismatches; 5203 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 204:
identities = 165,
mismatches = 39.
gaps = 5203:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 5203:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 5203:
open_internal_deletions = 2,
extend_internal_deletions = 5201;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 5203)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 5203)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 5203)
self.assertEqual(counts.gaps, 5203)
self.assertEqual(counts.aligned, 204)
self.assertEqual(counts.identities, 165)
self.assertEqual(counts.mismatches, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 5409))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_111921.1_modified")
self.assertEqual(len(alignment.target.seq), 198295559)
self.assertEqual(len(alignment.query.seq), 220)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48663767, 48663795, 48663796, 48663813, 48665640,
48665716, 48665716, 48665722, 48669098, 48669174],
[ 3, 31, 31, 48, 48,
124, 126, 132, 132, 208]
])
# fmt: on
)
)
dna = Seq(self.dna, length=len(alignment.target))
alignment.target.seq = dna
alignment.query.seq = self.rna[alignment.query.id]
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[53., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 34., 0., 0., 0., 0., 0., 0.],
[ 0., 2., 48., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 27., 0., 0., 0., 0.],
[ 9., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 7., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 16., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 7., 0., 0., 0., 0.],
]),
# fmt: on
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGTacgt")
self.assertEqual(
str(alignment),
"""\
chr3 48663767 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCGGTCAGTCATTGTTT
0 ||||||||||||||||||||||||||||-|||||||||||||||||--------------
NR_111921 3 CACGAGAGGAGCGGAGGCGAGGGGTGAA-GCGGAGCACTCCAATCG--------------
chr3 48663827 CTATTGGCACAATGGGAGGCCCCGCCCCTCACGGCGGACTCATCGCATGGGGGAGGGGGC
60 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48663887 TCCGCGGGTTGCCGGCTAACCGTGAGAGAGTCCGGGAGGTACACTATACGGACCGGCCTC
120 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48663947 CAAAGGCGGAATCGATAACGAGCTGCAGCGCCGGGTGCAGAGGACGCGGGCATCCCGAAG
180 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664007 CCCAGGAAGAGGTCAGGGCCGGGACCCCAGAACGCTCCACAGGGTGCGGCTCCCGCGATG
240 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664067 GGGTGGATCCTGGTTCTAACAGGCGAGGAACTCCTGGCCAAGGCCTCTGGCCCGCCCCGA
300 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664127 ACGGTCCCTATGACATCACCATCAACCAATCAGTCGGCGCATCCTTTCGCCCCTTGACTG
360 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664187 CTCCGCTTCCGGGAGGCGGGGCTTCTGCGGGTTCCACCTCCCGAGCGCCCCTTGTGGCTA
420 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664247 CCAAGGTCAGGCAACAGGTGTCCAGTTGTTCCCTCTCCTGTCTACGAATCTGAGGACCTC
480 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664307 CCCAGGATCAGAGCTCTGGGCCTGATACACGGCCGGGGTTCCTACGGGTTTGTGAGTGGG
540 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664367 GGTGGAAGATCTGCAGAGGCACTTAGGGCTGAACTCCTTTGAATGGGAGCCAATCGGTGC
600 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664427 AGGGGCTGGAGGAGCGAGTCCCCCAAAGTAGttttatttatctatttagagacaaggtct
660 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664487 cactctttcggagtgcagtggtgatcacagctcaccgtagcctcgaactccccaggcgat
720 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664547 tctctcacctcagcctcccgagtagctgggactacgggtacatgtcatcacacttggcta
780 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664607 atttttgcattttttatagagacagggtctcaccatgtaggccagattagtcttgaactc
840 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664667 ctgggctcaagcaatccgcccatcttggcctcccaaagtgctgggattataggtgtgagc
900 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664727 caccgcgcccggcAACCCAGAAGTGGTTTTGACAGCAccagcgctttctgtgtccacaat
960 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664787 ctagtgagtagagggcacaaaacctgacaccacggaggcagacaggcaggggctctgccg
1020 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664847 gggaagggtgttggagtcccaaaggaggcgtctgagtcaccttcgcaacctgggacgcct
1080 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664907 tcttgcataagatgcctgagcagtgccttgaatgaccaaggggagatccgcatctgcaaa
1140 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664967 ggaagggcagggagggatagggattgggggtgggcatcctaggtcttggagactgtgtgg
1200 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665027 gcaaatgtgcagagacataaagggactatggctgagggaaatcaagCCCTGCCCTCTCAC
1260 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665087 CAATAGGGCTGGCGCTGGTCCCAGCTAACACTCCTTTTGGAGAGCAAAGCTCCTCACTTC
1320 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665147 TGAGTAGTGAGATTGATTGCGGATCACTCTCCATGTTGCTGCCTGCTGTGTGTCATCCCA
1380 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665207 CTGTCATCCTCCCTTTGTGGCTGTTCTGTGGAGCCCCTCTCCCTCAATCTGCACTCACCT
1440 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665267 CTATGCCCCAGCCCCATTGGCAGCTCCTAATGCACTCCCGGTaaaaaaaaaaaaacaaaa
1500 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665327 aCCAGATGTTAGTGATAGTGGTGGTAGTTCTTCTCTCCACCTCCAAATCTTGCCCTTGCC
1560 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665387 TCCTAATAAGACCCCTATGTGGTTTAACCTCAttttttttttttttttttttttttgaga
1620 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665447 tggagtttcactctgtcacccaggctggagtgaagtggtgtgatGGGGCTTCACCATGTg
1680 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665507 atggggcttcaccatgttggccaggctggtatcaaactcctgacctctagtgatctgccc
1740 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665567 gcctcagcctcccaaagtgctgggattaccggcatgaggcaccgtgcccagccTATCCTC
1800 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665627 CTTCTCTTATCAGCTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
1860 -------------|||||||||||||||||||||||||||||||||||||||||||||||
NR_111921 48 -------------CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
chr3 48665687 TGCTGGGCGGCAGATGGAGCGATCAGAAG--ACCAGGGTAAGGGTGTGGCAGATACTGCC
1920 |||||..||||||||||||||||||||||--||||||-----------------------
NR_111921 95 TGCTGCCCGGCAGATGGAGCGATCAGAAGCCACCAGG-----------------------
chr3 48665745 ACTAACACTTCTCAGCCTTTCCTTCTCCTGCCTTTTCCACCCCACCCTGTGTTTGTCTAC
1980 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48665805 TCCCAGCCAGGTGTACCTTTCCAGGGGAAGACCTGGCCAACCTGTCCAGCTCAATTAtcc
2040 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48665865 agcagttctttgacctcactgagatctcgagtccattgttcatcacctcagctattgacc
2100 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48665925 tgtgtcattagccttatagagttcagtgccacggaaactccctgccctgttctttttctt
2160 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48665985 tttctttttttttttttttttgagacagagccttgctctgtcgcccaggctggagtgcag
2220 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666045 tggcgcgatctcggctcactgcaagctctgcctcccaggttcacaccattctcctgactc
2280 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666105 agcctcccgagtagctgggactacaggcgtccaccaccatgcccagctaatttttttttt
2340 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666165 tgtatttttagtagagacggcgtttcaccgtgttagccaggctggtctcgatctcctgac
2400 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666225 cttgtgatgctcccgcctcggcctcccaaagtgctgggattacaggcatgagccattgtg
2460 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666285 cccggcctgccctgttcttcttagacaaacttgctgggctaaaatctaaccccgttaaaa
2520 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666345 tagactatttacgtattgtttgcctctagcgcagcagaacattgctggagaaaaacaaac
2580 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666405 aaccgtgctaattggtctcattttatattcatgaccacaagcctcagtattatatcggag
2640 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666465 ggcctatccagtgcagtagggcaagaaaaataataagttatgaagattggaagggaaaaa
2700 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666525 aaactaattcacaagcagtaggattgtatatgtaaaaatttcaaaggaacctataggtaa
2760 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666585 gttgttagaatgagttcagcaaagttgttggacacaagatcaatatataaaaatcagttg
2820 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666645 caatttctatatgtcaccaacagttagaaaataaatttcttgcctgggcatgttggctca
2880 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666705 agcctgtaatcccagcactttgggtggccaaggcgggcagatcacctgaggtcaggagtt
2940 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666765 tgagaccagtttggccaacatggtgaaatcccgtctctactaaaaatacagaaattagcc
3000 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666825 gggcgtggtggtgggcacctgtagtcccagctactgaggaggctgaggcaggagaatcac
3060 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666885 ttgaacctgggaggcagaggttgcagtgaacgagaaaaaaaaattttttttcttaaaaac
3120 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666945 aatgatgtttacaatagcatcaagtaatatcaaatgctgaggaataaacctaatgaaaga
3180 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667005 tgtgcaaagactacatacacacatacaaaaaaactataaaacattattgagggaaataaa
3240 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667065 gacataggcctggcattggtggctcatgcctgaaatctcagcactttggagggccaaggt
3300 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667125 gggtggatcatttgaggtcaggagttagagatcagtccggccaacatggtgaaacctcat
3360 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667185 ctctactaaaaatacaaaaaaattagcttggccaggtgcagtggctcacacctgtaatcc
3420 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667245 cagcactttgggaggctgaggcgggcggatcatgaggtcaggagatcgagaccatcctgg
3480 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667305 ctaacacggtgaaaccctgtctctactaaaaatacaaaaaaaaattagccgggcctgatg
3540 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667365 gcgggcgcccgtagtcccagctactcgggaggctgaggtagcagaatggcgtgaacctgg
3600 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667425 gaggtgcagcttgcagtgagcctaaattgcgccactgcactccagcctgggtaacagagc
3660 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667485 gagactccgtttcaaaaaaaaaaaaaaaaattagctgggcatgctgttgtgcacctgcaa
3720 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667545 tcccagctactctggaggatgaggcagaagtgcctgaacctgggacacagaggttgcagt
3780 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667605 gagccaagatcatgccattgcactccagcctggacaacacagccagacgctatctgaaaa
3840 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667665 aaaaaaaaaaaaaaaaagtaaaaaaaatgagaaataaagacataaataaagtgaaaaatt
3900 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667725 gttccaatattggaaaagtcaatattataaaggtgccaattttcccaaattgatatatgg
3960 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667785 attcgatgcaacttcagttaaaaatcccactaaattttggctgggtgcggtggctcacac
4020 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667845 ctgtaatcccagcactttgggaggctgaggcgggcggatcacaaggtcaggagatcgaga
4080 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667905 ccatcttggctaacatggtgaaaccgtctctactaaaaatacaaaagttagccgggtgtg
4140 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667965 gtggcgggcacctgtagtcccagctacttgggaggctgagacagaatggcgtgaacctgg
4200 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668025 ggaggcggagcttgcagtgagccaagttgacgccactgcactccagcctgggcgacagag
4260 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668085 caagactctgtctcaaaaaaaaaaaaaaaaaaaTCCCACTAGATTTTGTGTGTGTGTAAA
4320 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668145 CTGACAAACTAGATTTAGcagcctgagcaacacagcaaaaccccatctctacaaaaaata
4380 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668205 caaaaattttgcacatgcctgtatagtcccagctacttgggaggctgaagtgggaggatc
4440 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668265 atgtgagctctggggaggtcgaggctgtagtgagctatgatcacatgctgcactctagcc
4500 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668325 tgggcaacagagcaagagaccctgtatctaaaaaaagaatgaaaattaaaaaataaaaaG
4560 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668385 Aaaccaagattgtgtggtactggtacgaggataggaagactaaaggaacgaaatccagag
4620 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668445 acaggcctgaagatgtgtggaaacttgaattttgacaagggtgGTTCTTCAGAGCTAACA
4680 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668505 TGAAGAAAGGGTTGTTTTCTTTTTTTTGTTTCCCcaggagcaactctattaactgaaaga
4740 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668565 ataggcttttcaataaatgatgctgggtcagttggatatccatatagaaaaaattaaatg
4800 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668625 agatctctatttcacactgcttgcataatcaattccatataaatttgacatctgaaaata
4860 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668685 tacagtttctagaaaacagtatTAAGACCttgttttgttttttgttgttgttgttttttg
4920 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668745 ttttgttttttgttttttgagacagagtctcgctctgtcgccaggctggaatacagtggt
4980 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668805 gcaaccttggctcactgcaacctctgactccctagttcaagcaattctcctgcctcagcc
5040 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668865 tcccgagtagctgcgattacaggcacatgccaccacgcccagctaatttttgtattttta
5100 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668925 gtagagatgggggtttcaccatgttggccaggatggtctcgatctcctgaccctgtaatc
5160 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668985 cgcccacctcggcctcccaaagtgctgggattacaggcgtgagccactgcacctggccAA
5220 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48669045 GAGAAGATCTTAAAGGTGACTTTAAGCAAACttttttttttttttttttacagagacggg
5280 -----------------------------------------------------.......
NR_111921 132 -----------------------------------------------------AGACGGG
chr3 48669105 agctggagtgcagtggctgttcacaagcgtgaAAGCAAAGATTAAAAAATTTGTTTTTAT
5340 ................................||||||||||||||||||||||||||||
NR_111921 139 AGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTAT
chr3 48669165 ATTAAAAAA 48669174
5400 ||||||||| 5409
NR_111921 199 ATTAAAAAA 208
""",
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 175 chr3 198295559 + 48663767 48669174 NR_111921.1_modified 220 + 3 208 4
28 1 0
17 1827 0
76 0 2
6 3376 0
76
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (203 aligned letters; 162 identities; 41 mismatches; 5206 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 203:
identities = 162,
mismatches = 41.
gaps = 5206:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 5206:
internal_insertions = 2:
open_internal_insertions = 1,
extend_internal_insertions = 1;
internal_deletions = 5204:
open_internal_deletions = 3,
extend_internal_deletions = 5201;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 2)
self.assertEqual(counts.internal_deletions, 5204)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 5206)
self.assertEqual(counts.insertions, 2)
self.assertEqual(counts.deletions, 5204)
self.assertEqual(counts.gaps, 5206)
self.assertEqual(counts.aligned, 203)
self.assertEqual(counts.identities, 162)
self.assertEqual(counts.mismatches, 41)
self.assertRaises(StopIteration, next, alignments)
def test_reading(self):
"""Test parsing dna_rna.chain."""
path = "Blat/dna_rna.chain"
alignments = Align.parse(path, "chain")
self.check_alignments(alignments)
alignments = iter(alignments)
self.check_alignments(alignments)
with Align.parse(path, "chain") as alignments:
self.check_alignments(alignments)
with self.assertRaises(AttributeError):
alignments._stream
with Align.parse(path, "chain") as alignments:
pass
with self.assertRaises(AttributeError):
alignments._stream
with open(path) as stream:
data = stream.read()
stream = NamedTemporaryFile("w+t")
stream.write(data)
stream.seek(0)
alignments = Align.parse(stream, "chain")
self.check_alignments(alignments)
def test_writing(self):
"""Test writing the alignments in dna_rna.chain."""
path = "Blat/dna_rna.chain"
alignments = Align.parse(path, "chain")
stream = StringIO()
n = Align.write(alignments, stream, "chain")
self.assertEqual(n, 4)
stream.seek(0)
alignments = Align.parse(stream, "chain")
self.check_alignments(alignments)
class TestAlign_dna(unittest.TestCase):
queries = {
record.id: str(record.seq)
for record in SeqIO.parse("Blat/fasta_34.fa", "fasta")
}
def test_reading_psl_34_001(self):
"""Test parsing psl_34_001.chain."""
# The chain file psl_34_001.chain was generated from the PSL file using:
# pslToChain psl_34_001.psl psl_34_001.chain
path = "Blat/psl_34_001.chain"
alignments = Align.parse(path, "chain")
self.check_reading_psl_34_001(alignments)
def check_reading_psl_34_001(self, alignments):
"""Check parsing psl_34_001.chain."""
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 16))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr4 61646095 ???????????????? 61646111
0 |||||||||||||||| 16
hg18_dna 11 ???????????????? 27
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 11, 27]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 16 chr4 191154276 + 61646095 61646111 hg18_dna 33 + 11 27 1
16
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 33))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr1 10271783 ????????????????????????????????? 10271816
0 ||||||||||||||||||||||||||||||||| 33
hg18_dna 0 ????????????????????????????????? 33
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr1 249250621 + 10271783 10271816 hg18_dna 33 + 0 33 2
33
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 17))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr2 53575980 ????????????????? 53575997
0 ||||||||||||||||| 17
hg18_dna 25 ????????????????? 8
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 25, 8]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 17 chr2 243199373 + 53575980 53575997 hg18_dna 33 - 8 25 3
17
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 141213431)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr9 85737865 ????????????????????????????????????????? 85737906
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 9 ????????????????????????????????????????? 50
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 9, 50]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 35 chr9 141213431 + 85737865 85737906 hg19_dna 50 + 9 50 4
41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 146364022)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr8 95160479 ????????????????????????????????????????? 95160520
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 8 ????????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 8, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr8 146364022 + 95160479 95160520 hg19_dna 50 + 8 49 5
41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr22 42144400 ???????????????????????????????????? 42144436
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 11 ???????????????????????????????????? 47
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 11, 47]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 30 chr22 51304566 + 42144400 42144436 hg19_dna 50 + 11 47 6
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 48))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr2 183925984 ??????----?????????????????????????????????????? 183926028
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
hg19_dna 1 ???????????????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183925990, 183926028],
[ 1, 7, 11, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr2 243199373 + 183925984 183926028 hg19_dna 50 + 1 49 7
6 0 4
38
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 170))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 35483340 ????????????????????????????????????????????????????????????
0 |||||||||||||||||||||||||-----------------------------------
hg19_dna 10 ?????????????????????????-----------------------------------
chr19 35483400 ????????????????????????????????????????????????????????????
60 ------------------------------------------------------------
hg19_dna 35 ------------------------------------------------------------
chr19 35483460 ?????????????????????????????????????????????????? 35483510
120 ---------------------------------------||||||||||| 170
hg19_dna 35 ---------------------------------------??????????? 46
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 10, 35, 35, 46]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 31 chr19 59128983 + 35483340 35483510 hg19_dna 50 + 10 46 8
25 134 0
11
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr18 23891310 ??????????????????????????????????????? 23891349
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 10 ??????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 10, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 39 chr18 78077248 + 23891310 23891349 hg19_dna 50 + 10 49 9
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 28))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr18 43252217 ???????????????????????????? 43252245
0 |||||||||||||||||||||||||||| 28
hg19_dna 21 ???????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 21, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 26 chr18 78077248 + 43252217 43252245 hg19_dna 50 + 21 49 10
28
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 54))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 115169878)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr13 52759147 ?????????????---??????????????????????????????????????
0 |||||||---------||||||||||||||||||||||||||||||||||||||
hg19_dna 1 ???????------?????????????????????????????????????????
chr13 52759198
54
hg19_dna 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759154, 52759160, 52759160, 52759198],
[ 1, 8, 8, 11, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr13 115169878 + 52759147 52759198 hg19_dna 50 + 1 49 11
7 6 3
38
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (45 aligned letters; 0 identities; 0 mismatches; 9 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 45:
identities = 0,
mismatches = 0.
gaps = 9:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 9:
internal_insertions = 3:
open_internal_insertions = 1,
extend_internal_insertions = 2;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 3)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 9)
self.assertEqual(counts.insertions, 3)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 9)
self.assertEqual(counts.aligned, 45)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 50))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 1207056 ?????????????????????????????????????????????????? 1207106
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
hg19_dna 0 ?????????????????????????????????????????????????? 50
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 50 chr1 249250621 + 1207056 1207106 hg19_dna 50 + 0 50 12
50
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 61700837 ?????????????????????????????????? 61700871
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 1 ?????????????????????????????????? 35
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 1, 35]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 28 chr1 249250621 + 61700837 61700871 hg19_dna 50 + 1 35 13
34
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 44))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr4 37558157 ????????????????----------?????????????????? 37558191
0 ||||||||||----------------|||||||||||||||||| 44
hg19_dna 49 ??????????------???????????????????????????? 11
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558167, 37558173, 37558173, 37558191],
[ 49, 39, 39, 29, 11]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "psl"),
"""\
28 0 0 0 1 10 1 6 - hg19_dna 50 11 49 chr4 191154276 37558157 37558191 2 10,18, 1,21, 37558157,37558173,
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 16 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 16:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 16:
internal_insertions = 10:
open_internal_insertions = 1,
extend_internal_insertions = 9;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 10)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 16)
self.assertEqual(counts.insertions, 10)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 16)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 37))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr22 48997405 ????????????????????????????????????? 48997442
0 ||||||||||||||||||||||||||||||||||||| 37
hg19_dna 49 ????????????????????????????????????? 12
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 49, 12]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr22 51304566 + 48997405 48997442 hg19_dna 50 - 1 38 15
37
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr2 120641740 ???????????????????????????????????? 120641776
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 34 chr2 243199373 + 120641740 120641776 hg19_dna 50 - 1 37 16
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 54017130 ??????????????????????????????????????? 54017169
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 49 ??????????????????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 49, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 39 chr19 59128983 + 54017130 54017169 hg19_dna 50 - 1 40 17
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 553742 ??????????????????????????????????????? 553781
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 49 ??????????????????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 49, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr19 59128983 + 553742 553781 hg19_dna 50 - 1 40 18
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr10 99388555 ???????????????????????????????????? 99388591
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 30 chr10 135534747 + 99388555 99388591 hg19_dna 50 - 1 37 19
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 25))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr10 112178171 ????????????????????????? 112178196
0 ||||||||||||||||||||||||| 25
hg19_dna 35 ????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 35, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 23 chr10 135534747 + 112178171 112178196 hg19_dna 50 - 15 40 20
25
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 39368490 ???????????????????????????????????? 39368526
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 34 chr1 249250621 + 39368490 39368526 hg19_dna 50 - 1 37 21
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 220325687 ?????????????????????????????????? 220325721
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 47 ?????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 47, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 32 chr1 249250621 + 220325687 220325721 hg19_dna 50 - 3 37 22
34
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
self.assertRaises(StopIteration, next, alignments)
def test_writing_chain_34_001(self):
"""Test writing the alignments in psl_34_001.chain."""
path = "Blat/psl_34_001.chain"
with open(path) as stream:
original_data = stream.read()
alignments = Align.parse(path, "chain")
stream = StringIO()
n = Align.write(alignments, stream, "chain")
self.assertEqual(n, 22)
stream.seek(0)
alignments = Align.parse(stream, "chain")
self.check_reading_psl_34_001(alignments)
def test_reading_chain_34_002(self):
"""Test parsing psl_34_002.chain."""
# The chain file psl_34_002.chain was generated from the PSL file using:
# pslToChain psl_34_002.psl psl_34_002.chain
path = "Blat/psl_34_002.chain"
alignments = Align.parse(path, "chain")
self.assertRaises(StopIteration, next, alignments)
def test_writing_psl_34_002(self):
"""Test writing the alignments in psl_34_002.chain."""
path = "Blat/psl_34_002.chain"
alignments = Align.parse(path, "chain")
stream = StringIO()
n = Align.write(alignments, stream, "chain")
self.assertEqual(n, 0)
stream.seek(0)
alignments = Align.parse(stream, "chain")
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_003(self):
"""Test parsing psl_34_003.chain."""
# The chain file psl_34_003.chain was generated from the PSL file using:
# pslToChain psl_34_003.psl psl_34_003.chain
path = "Blat/psl_34_003.chain"
alignments = Align.parse(path, "chain")
self.check_reading_psl_34_003(alignments)
def check_reading_psl_34_003(self, alignments):
"""Check parsing psl_34_003.chain."""
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 16))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr4 61646095 ???????????????? 61646111
0 |||||||||||||||| 16
hg18_dna 11 ???????????????? 27
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 11, 27]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 16 chr4 191154276 + 61646095 61646111 hg18_dna 33 + 11 27 1
16
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 33))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr1 10271783 ????????????????????????????????? 10271816
0 ||||||||||||||||||||||||||||||||| 33
hg18_dna 0 ????????????????????????????????? 33
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr1 249250621 + 10271783 10271816 hg18_dna 33 + 0 33 2
33
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 17))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr2 53575980 ????????????????? 53575997
0 ||||||||||||||||| 17
hg18_dna 25 ????????????????? 8
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 25, 8]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 17 chr2 243199373 + 53575980 53575997 hg18_dna 33 - 8 25 3
17
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
self.assertRaises(StopIteration, next, alignments)
def test_writing_psl_34_003(self):
"""Test writing the alignments in psl_34_003.chain."""
path = "Blat/psl_34_003.chain"
alignments = Align.parse(path, "chain")
stream = StringIO()
n = Align.write(alignments, stream, "chain")
self.assertEqual(n, 3)
stream.seek(0)
alignments = Align.parse(stream, "chain")
self.check_reading_psl_34_003(alignments)
def test_reading_psl_34_004(self):
"""Test parsing psl_34_004.chain."""
# The chain file psl_34_004.chain was generated from the PSL file using:
# pslToChain psl_34_004.psl psl_34_004.chain
path = "Blat/psl_34_004.chain"
alignments = Align.parse(path, "chain")
self.check_reading_psl_34_004(alignments)
def check_reading_psl_34_004(self, alignments):
"""Check parsing psl_34_004.chain."""
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 141213431)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr9 85737865 ????????????????????????????????????????? 85737906
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 9 ????????????????????????????????????????? 50
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 9, 50]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 35 chr9 141213431 + 85737865 85737906 hg19_dna 50 + 9 50 1
41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 146364022)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr8 95160479 ????????????????????????????????????????? 95160520
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 8 ????????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 8, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr8 146364022 + 95160479 95160520 hg19_dna 50 + 8 49 2
41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr22 42144400 ???????????????????????????????????? 42144436
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 11 ???????????????????????????????????? 47
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 11, 47]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 30 chr22 51304566 + 42144400 42144436 hg19_dna 50 + 11 47 3
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 48))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr2 183925984 ??????----?????????????????????????????????????? 183926028
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
hg19_dna 1 ???????????????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183925990, 183926028],
[ 1, 7, 11, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr2 243199373 + 183925984 183926028 hg19_dna 50 + 1 49 4
6 0 4
38
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 170))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 35483340 ????????????????????????????????????????????????????????????
0 |||||||||||||||||||||||||-----------------------------------
hg19_dna 10 ?????????????????????????-----------------------------------
chr19 35483400 ????????????????????????????????????????????????????????????
60 ------------------------------------------------------------
hg19_dna 35 ------------------------------------------------------------
chr19 35483460 ?????????????????????????????????????????????????? 35483510
120 ---------------------------------------||||||||||| 170
hg19_dna 35 ---------------------------------------??????????? 46
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 10, 35, 35, 46]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 31 chr19 59128983 + 35483340 35483510 hg19_dna 50 + 10 46 5
25 134 0
11
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr18 23891310 ??????????????????????????????????????? 23891349
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 10 ??????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 10, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 39 chr18 78077248 + 23891310 23891349 hg19_dna 50 + 10 49 6
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 28))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr18 43252217 ???????????????????????????? 43252245
0 |||||||||||||||||||||||||||| 28
hg19_dna 21 ???????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 21, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 26 chr18 78077248 + 43252217 43252245 hg19_dna 50 + 21 49 7
28
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 54))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 115169878)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr13 52759147 ?????????????---??????????????????????????????????????
0 |||||||---------||||||||||||||||||||||||||||||||||||||
hg19_dna 1 ???????------?????????????????????????????????????????
chr13 52759198
54
hg19_dna 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759154, 52759160, 52759160, 52759198],
[ 1, 8, 8, 11, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr13 115169878 + 52759147 52759198 hg19_dna 50 + 1 49 8
7 6 3
38
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (45 aligned letters; 0 identities; 0 mismatches; 9 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 45:
identities = 0,
mismatches = 0.
gaps = 9:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 9:
internal_insertions = 3:
open_internal_insertions = 1,
extend_internal_insertions = 2;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 3)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 9)
self.assertEqual(counts.insertions, 3)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 9)
self.assertEqual(counts.aligned, 45)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 50))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 1207056 ?????????????????????????????????????????????????? 1207106
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
hg19_dna 0 ?????????????????????????????????????????????????? 50
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 50 chr1 249250621 + 1207056 1207106 hg19_dna 50 + 0 50 9
50
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 61700837 ?????????????????????????????????? 61700871
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 1 ?????????????????????????????????? 35
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 1, 35]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 28 chr1 249250621 + 61700837 61700871 hg19_dna 50 + 1 35 10
34
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 44))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr4 37558157 ????????????????----------?????????????????? 37558191
0 ||||||||||----------------|||||||||||||||||| 44
hg19_dna 49 ??????????------???????????????????????????? 11
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558167, 37558173, 37558173, 37558191],
[ 49, 39, 39, 29, 11]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 26 chr4 191154276 + 37558157 37558191 hg19_dna 50 - 1 39 11
10 6 10
18
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 16 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 16:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 16:
internal_insertions = 10:
open_internal_insertions = 1,
extend_internal_insertions = 9;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 10)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 16)
self.assertEqual(counts.insertions, 10)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 16)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 37))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr22 48997405 ????????????????????????????????????? 48997442
0 ||||||||||||||||||||||||||||||||||||| 37
hg19_dna 49 ????????????????????????????????????? 12
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 49, 12]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr22 51304566 + 48997405 48997442 hg19_dna 50 - 1 38 12
37
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr2 120641740 ???????????????????????????????????? 120641776
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 34 chr2 243199373 + 120641740 120641776 hg19_dna 50 - 1 37 13
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 54017130 ??????????????????????????????????????? 54017169
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 49 ??????????????????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 49, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 39 chr19 59128983 + 54017130 54017169 hg19_dna 50 - 1 40 14
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 553742 ??????????????????????????????????????? 553781
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 49 ??????????????????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 49, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr19 59128983 + 553742 553781 hg19_dna 50 - 1 40 15
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr10 99388555 ???????????????????????????????????? 99388591
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 30 chr10 135534747 + 99388555 99388591 hg19_dna 50 - 1 37 16
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 25))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr10 112178171 ????????????????????????? 112178196
0 ||||||||||||||||||||||||| 25
hg19_dna 35 ????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 35, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 23 chr10 135534747 + 112178171 112178196 hg19_dna 50 - 15 40 17
25
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 39368490 ???????????????????????????????????? 39368526
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 34 chr1 249250621 + 39368490 39368526 hg19_dna 50 - 1 37 18
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 220325687 ?????????????????????????????????? 220325721
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 47 ?????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 47, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 32 chr1 249250621 + 220325687 220325721 hg19_dna 50 - 3 37 19
34
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
self.assertRaises(StopIteration, next, alignments)
def test_writing_psl_34_004(self):
"""Test writing the alignments in psl_34_004.chain."""
path = "Blat/psl_34_004.chain"
with open(path) as stream:
original_data = stream.read()
alignments = Align.parse(path, "chain")
stream = StringIO()
n = Align.write(alignments, stream, "chain")
self.assertEqual(n, 19)
stream.seek(0)
alignments = Align.parse(stream, "chain")
self.check_reading_psl_34_004(alignments)
def test_reading_psl_34_005(self):
"""Test parsing psl_34_005.chain."""
# The chain file psl_34_005.chain was generated from the PSL file using:
# pslToChain psl_34_005.psl psl_34_005.chain
path = "Blat/psl_34_005.chain"
alignments = Align.parse(path, "chain")
self.check_reading_psl_34_005(alignments)
def check_reading_psl_34_005(self, alignments):
"""Check parsing psl_34_005.chain."""
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 16))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr4 61646095 ???????????????? 61646111
0 |||||||||||||||| 16
hg18_dna 11 ???????????????? 27
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 11, 27]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 16 chr4 191154276 + 61646095 61646111 hg18_dna 33 + 11 27 1
16
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 33))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr1 10271783 ????????????????????????????????? 10271816
0 ||||||||||||||||||||||||||||||||| 33
hg18_dna 0 ????????????????????????????????? 33
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr1 249250621 + 10271783 10271816 hg18_dna 33 + 0 33 2
33
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 17))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 33)
self.assertEqual(
str(alignment),
"""\
chr2 53575980 ????????????????? 53575997
0 ||||||||||||||||| 17
hg18_dna 25 ????????????????? 8
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 25, 8]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 17 chr2 243199373 + 53575980 53575997 hg18_dna 33 - 8 25 3
17
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 141213431)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr9 85737865 ????????????????????????????????????????? 85737906
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 9 ????????????????????????????????????????? 50
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 9, 50]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 35 chr9 141213431 + 85737865 85737906 hg19_dna 50 + 9 50 4
41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 146364022)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr8 95160479 ????????????????????????????????????????? 95160520
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 8 ????????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 8, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr8 146364022 + 95160479 95160520 hg19_dna 50 + 8 49 5
41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr22 42144400 ???????????????????????????????????? 42144436
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 11 ???????????????????????????????????? 47
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 11, 47]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 30 chr22 51304566 + 42144400 42144436 hg19_dna 50 + 11 47 6
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 48))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr2 183925984 ??????----?????????????????????????????????????? 183926028
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
hg19_dna 1 ???????????????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183925990, 183926028],
[ 1, 7, 11, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr2 243199373 + 183925984 183926028 hg19_dna 50 + 1 49 7
6 0 4
38
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 170))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 35483340 ????????????????????????????????????????????????????????????
0 |||||||||||||||||||||||||-----------------------------------
hg19_dna 10 ?????????????????????????-----------------------------------
chr19 35483400 ????????????????????????????????????????????????????????????
60 ------------------------------------------------------------
hg19_dna 35 ------------------------------------------------------------
chr19 35483460 ?????????????????????????????????????????????????? 35483510
120 ---------------------------------------||||||||||| 170
hg19_dna 35 ---------------------------------------??????????? 46
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 10, 35, 35, 46]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 31 chr19 59128983 + 35483340 35483510 hg19_dna 50 + 10 46 8
25 134 0
11
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr18 23891310 ??????????????????????????????????????? 23891349
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 10 ??????????????????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 10, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 39 chr18 78077248 + 23891310 23891349 hg19_dna 50 + 10 49 9
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 28))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr18 43252217 ???????????????????????????? 43252245
0 |||||||||||||||||||||||||||| 28
hg19_dna 21 ???????????????????????????? 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 21, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 26 chr18 78077248 + 43252217 43252245 hg19_dna 50 + 21 49 10
28
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 54))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 115169878)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr13 52759147 ?????????????---??????????????????????????????????????
0 |||||||---------||||||||||||||||||||||||||||||||||||||
hg19_dna 1 ???????------?????????????????????????????????????????
chr13 52759198
54
hg19_dna 49
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759154, 52759160, 52759160, 52759198],
[ 1, 8, 8, 11, 49]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 41 chr13 115169878 + 52759147 52759198 hg19_dna 50 + 1 49 11
7 6 3
38
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (45 aligned letters; 0 identities; 0 mismatches; 9 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 45:
identities = 0,
mismatches = 0.
gaps = 9:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 9:
internal_insertions = 3:
open_internal_insertions = 1,
extend_internal_insertions = 2;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 3)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 9)
self.assertEqual(counts.insertions, 3)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 9)
self.assertEqual(counts.aligned, 45)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 50))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 1207056 ?????????????????????????????????????????????????? 1207106
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
hg19_dna 0 ?????????????????????????????????????????????????? 50
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 50 chr1 249250621 + 1207056 1207106 hg19_dna 50 + 0 50 12
50
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 61700837 ?????????????????????????????????? 61700871
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 1 ?????????????????????????????????? 35
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 1, 35]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 28 chr1 249250621 + 61700837 61700871 hg19_dna 50 + 1 35 13
34
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 44))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr4 37558157 ????????????????----------?????????????????? 37558191
0 ||||||||||----------------|||||||||||||||||| 44
hg19_dna 49 ??????????------???????????????????????????? 11
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558167, 37558173, 37558173, 37558191],
[ 49, 39, 39, 29, 11]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 26 chr4 191154276 + 37558157 37558191 hg19_dna 50 - 1 39 14
10 6 10
18
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 16 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 16:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 16:
internal_insertions = 10:
open_internal_insertions = 1,
extend_internal_insertions = 9;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 10)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 16)
self.assertEqual(counts.insertions, 10)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 16)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 37))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr22 48997405 ????????????????????????????????????? 48997442
0 ||||||||||||||||||||||||||||||||||||| 37
hg19_dna 49 ????????????????????????????????????? 12
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 49, 12]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr22 51304566 + 48997405 48997442 hg19_dna 50 - 1 38 15
37
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr2 120641740 ???????????????????????????????????? 120641776
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 34 chr2 243199373 + 120641740 120641776 hg19_dna 50 - 1 37 16
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 54017130 ??????????????????????????????????????? 54017169
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 49 ??????????????????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 49, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 39 chr19 59128983 + 54017130 54017169 hg19_dna 50 - 1 40 17
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr19 553742 ??????????????????????????????????????? 553781
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 49 ??????????????????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 49, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 33 chr19 59128983 + 553742 553781 hg19_dna 50 - 1 40 18
39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr10 99388555 ???????????????????????????????????? 99388591
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 30 chr10 135534747 + 99388555 99388591 hg19_dna 50 - 1 37 19
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 25))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr10 112178171 ????????????????????????? 112178196
0 ||||||||||||||||||||||||| 25
hg19_dna 35 ????????????????????????? 10
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 35, 10]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 23 chr10 135534747 + 112178171 112178196 hg19_dna 50 - 15 40 20
25
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 39368490 ???????????????????????????????????? 39368526
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 34 chr1 249250621 + 39368490 39368526 hg19_dna 50 - 1 37 21
36
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertEqual(
str(alignment),
"""\
chr1 220325687 ?????????????????????????????????? 220325721
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 47 ?????????????????????????????????? 13
""",
)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 47, 13]]),
# fmt: on
)
)
self.assertEqual(
format(alignment, "chain"),
"""\
chain 32 chr1 249250621 + 220325687 220325721 hg19_dna 50 - 3 37 22
34
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
self.assertRaises(StopIteration, next, alignments)
def test_writing_psl_34_005(self):
"""Test writing the alignments in psl_34_005.chain."""
path = "Blat/psl_34_005.chain"
alignments = Align.parse(path, "chain")
stream = StringIO()
n = Align.write(alignments, stream, "chain")
self.assertEqual(n, 22)
stream.seek(0)
alignments = Align.parse(stream, "chain")
self.check_reading_psl_34_005(alignments)
class TestAlign_strand(unittest.TestCase):
def test_format(self):
"""Test alignment with the target on the opposite strand."""
sequences = ["AACAGCAGCGTGTCG", "CAGCTAGCGAA"]
coordinates = np.array(
[[0, 2, 2, 3, 4, 6, 6, 9, 10, 12, 15], [11, 11, 9, 8, 8, 6, 5, 2, 2, 0, 0]]
)
score = 8
alignment = Alignment(sequences, coordinates)
alignment.score = score
chain1 = """\
chain 8 target 15 + 0 15 query 11 - 0 11
0 2 2
1 1 0
2 0 1
3 1 0
0 3 0
2
"""
chain2 = """\
chain 8 target 15 - 0 15 query 11 + 0 11
2 3 0
3 1 0
2 0 1
1 1 0
0 2 2
0
"""
chain3 = """\
chain 8 target 15 + 0 15 query 11 - 0 11
2 3 0
3 1 0
2 0 1
1 1 0
0 2 2
0
"""
chain4 = """\
chain 8 target 15 - 0 15 query 11 + 0 11
0 2 2
1 1 0
2 0 1
3 1 0
0 3 0
2
"""
self.assertEqual(
str(alignment),
"""\
target 0 AA--CAGC-AGCGTGTCG 15
0 ----|-||-|||-||--- 18
query 11 --TTC-GCTAGC-TG--- 0
""",
)
self.assertEqual(format(alignment, "chain"), chain1)
alignment.coordinates = alignment.coordinates[:, ::-1]
self.assertEqual(
str(alignment),
"""\
target 15 CGACACGCT-GCTG--TT 0
0 ---||-|||-||-|---- 18
query 0 ---CA-GCTAGC-GAA-- 11
""",
)
self.assertEqual(format(alignment, "chain"), chain2)
alignment.coordinates = alignment.coordinates[:, ::-1]
alignment = alignment.reverse_complement()
alignment.score = score
self.assertEqual(
str(alignment),
"""\
target 0 CGACACGCT-GCTG--TT 15
0 ---||-|||-||-|---- 18
query 11 ---CA-GCTAGC-GAA-- 0
""",
)
self.assertEqual(format(alignment, "chain"), chain3)
alignment.coordinates = alignment.coordinates[:, ::-1]
self.assertEqual(
str(alignment),
"""\
target 15 AA--CAGC-AGCGTGTCG 0
0 ----|-||-|||-||--- 18
query 0 --TTC-GCTAGC-TG--- 11
""",
)
self.assertEqual(format(alignment, "chain"), chain4)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)