Files
biopython/Tests/test_Align_sam.py
mdehoon 66ad0a062e Extend the .counts method of an Alignment (#5011)
* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* updat

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* halfway finished

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* pdate

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* replace precompiler #defines by inline functions

* update

* update

* add tests

* update

* update

* update

* update

* update

* update

* update

* update

* documentation

* update

* avoid stpcpy

* pointer printing

* compiler warning

* testing without codonalign

* testing without codonalign and pairwisealigner

* compiler warning

* adding codonalign and pairwisealigner back in

* remove inline from check_indices

* add inline to check_indices

* update

* add test line 12287 test_pairwise_aligner.py

* update

* update

* update

* update

* change dtype from int32 to "i"

* all done

* testing

* testing

* testing

* testing

* done

* done

* fix how pointers are printed on Windows

* update

* update

* update

* update

* fix id printing on pypy

* style change only

* Use Py_uintptr_t instead of uintptr_t

* fix memory leak

* remove double semicolon

* check if GitHub actions are now pickup up Python version 3.13.5 without hardcoding it

---------

Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local>
Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp>
Co-authored-by: Michiel de Hoon <michiel.dehoon@riken.jp>
2025-07-11 14:40:07 +09:00

9632 lines
383 KiB
Python

# Copyright 2022 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for Align.sam module."""
import unittest
from io import StringIO
from Bio import Align
from Bio import SeqIO
from Bio.Align import Alignment
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
try:
import numpy as np
except ImportError:
from Bio import MissingPythonDependencyError
raise MissingPythonDependencyError(
"Install numpy if you want to use Bio.Align.sam."
) from None
class TestAlign_dna_rna(unittest.TestCase):
# The SAM file dna_rna.sam was generated using these commands:
# twoBitToFa hg38.2bit stdout | samtools dict -a hg38 -s "Homo sapiens" | grep -v chrUn | grep -v alt | grep -v random > dna_rna.sam
# psl2sam.pl dna_rna.psl >> dna_rna.sam
# The CIGAR string was then edited to replace D by N for introns and H by S
# where appropriate.
# The alignment scores (AS tag) were copied from the BED file dna_rna.bed.
def setUp(self):
data = {}
records = SeqIO.parse("Blat/dna.fa", "fasta")
for record in records:
name, start_end = record.id.split(":")
assert name == "chr3"
start, end = start_end.split("-")
start = int(start)
end = int(end)
sequence = str(record.seq).upper()
assert len(sequence) == end - start
data[start] = sequence
self.dna = Seq(data, length=198295559)
records = SeqIO.parse("Blat/rna.fa", "fasta")
self.rna = {record.id: record.seq for record in records}
self.rna["NR_111921.1"] = self.rna["NR_111921.1"][:-12]
self.rna["NR_111921.1_modified"] = self.rna["NR_111921.1_modified"][:-12]
# Last 12 nucleotides were clipped by Blat as the poly(A) tail
def check_alignments(self, alignments):
"""Check the alignments."""
self.assertEqual(list(alignments.metadata), ["HD"])
self.assertEqual(alignments.metadata["HD"], {"VN": "1.0", "SO": "unsorted"})
self.assertEqual(len(alignments.targets), 25)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 248956422)
self.assertEqual(
alignments.targets[0].annotations,
{
"MD5": "2648ae1bacce4ec4b6cf337dcae37816",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[1].id, "chr10")
self.assertEqual(len(alignments.targets[1]), 133797422)
self.assertEqual(
alignments.targets[1].annotations,
{
"MD5": "907112d17fcb73bcab1ed1c72b97ce68",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[2].id, "chr11")
self.assertEqual(len(alignments.targets[2]), 135086622)
self.assertEqual(
alignments.targets[2].annotations,
{
"MD5": "1511375dc2dd1b633af8cf439ae90cec",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[3].id, "chr12")
self.assertEqual(len(alignments.targets[3]), 133275309)
self.assertEqual(
alignments.targets[3].annotations,
{
"MD5": "e81e16d3f44337034695a29b97708fce",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[4].id, "chr13")
self.assertEqual(len(alignments.targets[4]), 114364328)
self.assertEqual(
alignments.targets[4].annotations,
{
"MD5": "17dab79b963ccd8e7377cef59a54fe1c",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[5].id, "chr14")
self.assertEqual(len(alignments.targets[5]), 107043718)
self.assertEqual(
alignments.targets[5].annotations,
{
"MD5": "acbd9552c059d9b403e75ed26c1ce5bc",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[6].id, "chr15")
self.assertEqual(len(alignments.targets[6]), 101991189)
self.assertEqual(
alignments.targets[6].annotations,
{
"MD5": "f036bd11158407596ca6bf3581454706",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[7].id, "chr16")
self.assertEqual(len(alignments.targets[7]), 90338345)
self.assertEqual(
alignments.targets[7].annotations,
{
"MD5": "24e7cabfba3548a2bb4dff582b9ee870",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[8].id, "chr17")
self.assertEqual(len(alignments.targets[8]), 83257441)
self.assertEqual(
alignments.targets[8].annotations,
{
"MD5": "a8499ca51d6fb77332c2d242923994eb",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[9].id, "chr18")
self.assertEqual(len(alignments.targets[9]), 80373285)
self.assertEqual(
alignments.targets[9].annotations,
{
"MD5": "11eeaa801f6b0e2e36a1138616b8ee9a",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[10].id, "chr19")
self.assertEqual(len(alignments.targets[10]), 58617616)
self.assertEqual(
alignments.targets[10].annotations,
{
"MD5": "b0eba2c7bb5c953d1e06a508b5e487de",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[11].id, "chr2")
self.assertEqual(len(alignments.targets[11]), 242193529)
self.assertEqual(
alignments.targets[11].annotations,
{
"MD5": "4bb4f82880a14111eb7327169ffb729b",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[12].id, "chr20")
self.assertEqual(len(alignments.targets[12]), 64444167)
self.assertEqual(
alignments.targets[12].annotations,
{
"MD5": "b18e6c531b0bd70e949a7fc20859cb01",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[13].id, "chr21")
self.assertEqual(len(alignments.targets[13]), 46709983)
self.assertEqual(
alignments.targets[13].annotations,
{
"MD5": "2f45a3455007b7e271509161e52954a9",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[14].id, "chr22")
self.assertEqual(len(alignments.targets[14]), 50818468)
self.assertEqual(
alignments.targets[14].annotations,
{
"MD5": "221733a2a15e2de66d33e73d126c5109",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[15].id, "chr3")
self.assertEqual(len(alignments.targets[15]), 198295559)
self.assertEqual(
alignments.targets[15].annotations,
{
"MD5": "a48af509898d3736ba95dc0912c0b461",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[16].id, "chr4")
self.assertEqual(len(alignments.targets[16]), 190214555)
self.assertEqual(
alignments.targets[16].annotations,
{
"MD5": "3210fecf1eb92d5489da4346b3fddc6e",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[17].id, "chr5")
self.assertEqual(len(alignments.targets[17]), 181538259)
self.assertEqual(
alignments.targets[17].annotations,
{
"MD5": "f7f05fb7ceea78cbc32ce652c540ff2d",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[18].id, "chr6")
self.assertEqual(len(alignments.targets[18]), 170805979)
self.assertEqual(
alignments.targets[18].annotations,
{
"MD5": "6a48dfa97e854e3c6f186c8ff973f7dd",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[19].id, "chr7")
self.assertEqual(len(alignments.targets[19]), 159345973)
self.assertEqual(
alignments.targets[19].annotations,
{
"MD5": "94eef2b96fd5a7c8db162c8c74378039",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[20].id, "chr8")
self.assertEqual(len(alignments.targets[20]), 145138636)
self.assertEqual(
alignments.targets[20].annotations,
{
"MD5": "c67955b5f7815a9a1edfaa15893d3616",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[21].id, "chr9")
self.assertEqual(len(alignments.targets[21]), 138394717)
self.assertEqual(
alignments.targets[21].annotations,
{
"MD5": "addd2795560986b7491c40b1faa3978a",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[22].id, "chrM")
self.assertEqual(len(alignments.targets[22]), 16569)
self.assertEqual(
alignments.targets[22].annotations,
{
"MD5": "c68f52674c9fb33aef52dcf399755519",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[23].id, "chrX")
self.assertEqual(len(alignments.targets[23]), 156040895)
self.assertEqual(
alignments.targets[23].annotations,
{
"MD5": "49527016a48497d9d1cbd8e4a9049bd3",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
self.assertEqual(alignments.targets[24].id, "chrY")
self.assertEqual(len(alignments.targets[24]), 57227415)
self.assertEqual(
alignments.targets[24].annotations,
{
"MD5": "b2b7e6369564d89059e763cd6e736837",
"assembly": "hg38",
"species": "Homo sapiens",
},
)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 1711))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_046654.1")
self.assertEqual(len(alignment.target.seq), len(self.dna))
self.assertEqual(
alignment.target.seq.defined_ranges,
((42530895, 42530958), (42532020, 42532095), (42532563, 42532606)),
)
for start, end in alignment.target.seq.defined_ranges:
self.assertEqual(alignment.target.seq[start:end], self.dna[start:end])
self.assertEqual(alignment.query.seq, self.rna[alignment.query.id])
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42530895, 42530958, 42532020, 42532095, 42532563, 42532606],
[ 181, 118, 118, 43, 43, 0]])
# fmt: on
)
)
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[38., 0., 0., 0.],
[ 0., 41., 0., 0.],
[ 0., 0., 60., 0.],
[ 0., 0., 0., 42.],
])
# fmt: on
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGT")
self.assertEqual(alignment.mapq, 0)
matches = sum(
alignment.substitutions[c, c] for c in alignment.substitutions.alphabet
)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.annotations["NM"], 0)
self.assertNotIn("hard_clip_left", alignment.query.annotations)
self.assertNotIn("hard_clip_right", alignment.query.annotations)
self.assertEqual(alignment.operations, bytearray(b"MNMNM"))
self.assertEqual(
str(alignment),
"""\
chr3 42530895 CGGAAGTACTTCTGGGGGTACATACTCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCC
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NR_046654 181 CGGAAGTACTTCTGGGGGTACATACTCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCC
chr3 42530955 AGG?????????????????????????????????????????????????????????
60 |||---------------------------------------------------------
NR_046654 121 AGG---------------------------------------------------------
chr3 42531015 ????????????????????????????????????????????????????????????
120 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531075 ????????????????????????????????????????????????????????????
180 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531135 ????????????????????????????????????????????????????????????
240 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531195 ????????????????????????????????????????????????????????????
300 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531255 ????????????????????????????????????????????????????????????
360 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531315 ????????????????????????????????????????????????????????????
420 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531375 ????????????????????????????????????????????????????????????
480 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531435 ????????????????????????????????????????????????????????????
540 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531495 ????????????????????????????????????????????????????????????
600 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531555 ????????????????????????????????????????????????????????????
660 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531615 ????????????????????????????????????????????????????????????
720 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531675 ????????????????????????????????????????????????????????????
780 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531735 ????????????????????????????????????????????????????????????
840 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531795 ????????????????????????????????????????????????????????????
900 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531855 ????????????????????????????????????????????????????????????
960 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531915 ????????????????????????????????????????????????????????????
1020 ------------------------------------------------------------
NR_046654 118 ------------------------------------------------------------
chr3 42531975 ?????????????????????????????????????????????CAGTTCTTCCTTGAG
1080 ---------------------------------------------|||||||||||||||
NR_046654 118 ---------------------------------------------CAGTTCTTCCTTGAG
chr3 42532035 CGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGAC
1140 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NR_046654 103 CGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGAC
chr3 42532095 ????????????????????????????????????????????????????????????
1200 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532155 ????????????????????????????????????????????????????????????
1260 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532215 ????????????????????????????????????????????????????????????
1320 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532275 ????????????????????????????????????????????????????????????
1380 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532335 ????????????????????????????????????????????????????????????
1440 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532395 ????????????????????????????????????????????????????????????
1500 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532455 ????????????????????????????????????????????????????????????
1560 ------------------------------------------------------------
NR_046654 43 ------------------------------------------------------------
chr3 42532515 ????????????????????????????????????????????????CTAGCATCCTTC
1620 ------------------------------------------------||||||||||||
NR_046654 43 ------------------------------------------------CTAGCATCCTTC
chr3 42532575 CCAGGTATGCATCTGCTGCCAAGCCAGGGAG 42532606
1680 ||||||||||||||||||||||||||||||| 1711
NR_046654 31 CCAGGTATGCATCTGCTGCCAAGCCAGGGAG 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
NR_046654.1 16 chr3 42530896 0 63M1062N75M468N43M * 0 0 CGGAAGTACTTCTGGGGGTACATACTCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCCAGGCAGTTCTTCCTTGAGCGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGACCTAGCATCCTTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAG * AS:i:1000 NM:i:0
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (181 aligned letters; 181 identities; 0 mismatches; 1530 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 181:
identities = 181,
mismatches = 0.
gaps = 1530:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 1530:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 1530:
open_internal_deletions = 2,
extend_internal_deletions = 1528;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 1530)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 1530)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 1530)
self.assertEqual(counts.gaps, 1530)
self.assertEqual(counts.aligned, 181)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 1714))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_046654.1_modified")
self.assertEqual(len(alignment.target.seq), len(self.dna))
self.assertEqual(
alignment.target.seq.defined_ranges,
((42530895, 42530958), (42532020, 42532095), (42532563, 42532606)),
)
for start, end in alignment.target.seq.defined_ranges:
self.assertEqual(alignment.target.seq[start:end], self.dna[start:end])
self.assertEqual(alignment.query.seq, self.rna[alignment.query.id])
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42530895, 42530922, 42530922, 42530958, 42532020,
42532037, 42532039, 42532095, 42532563, 42532606],
[ 185, 158, 155, 119, 119,
102, 102, 46, 46, 3],
])
# fmt: on
)
)
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[36., 0., 0., 1.],
[ 0., 41., 0., 0.],
[ 0., 0., 60., 0.],
[ 0., 0., 0., 41.],
]),
# fmt: on
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGT")
self.assertEqual(alignment.mapq, 0)
self.assertEqual(alignment.score, 978)
self.assertEqual(alignment.annotations["NM"], 6)
self.assertNotIn("hard_clip_left", alignment.query.annotations)
self.assertNotIn("hard_clip_right", alignment.query.annotations)
self.assertEqual(alignment.operations, bytearray(b"MIMNMDMNM"))
self.assertEqual(
str(alignment),
"""\
chr3 42530895 CGGAAGTACTTCTGGGGGTACATACTC---ATCGGCTGGGGTATGGTACCAGGGAGGGCT
0 |||||||||||||||||||||||||||---||||||||||||||||||||||||||||||
NR_046654 185 CGGAAGTACTTCTGGGGGTACATACTCCCCATCGGCTGGGGTATGGTACCAGGGAGGGCT
chr3 42530952 TCCAGG??????????????????????????????????????????????????????
60 ||||||------------------------------------------------------
NR_046654 125 TCCAGG------------------------------------------------------
chr3 42531012 ????????????????????????????????????????????????????????????
120 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531072 ????????????????????????????????????????????????????????????
180 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531132 ????????????????????????????????????????????????????????????
240 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531192 ????????????????????????????????????????????????????????????
300 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531252 ????????????????????????????????????????????????????????????
360 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531312 ????????????????????????????????????????????????????????????
420 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531372 ????????????????????????????????????????????????????????????
480 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531432 ????????????????????????????????????????????????????????????
540 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531492 ????????????????????????????????????????????????????????????
600 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531552 ????????????????????????????????????????????????????????????
660 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531612 ????????????????????????????????????????????????????????????
720 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531672 ????????????????????????????????????????????????????????????
780 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531732 ????????????????????????????????????????????????????????????
840 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531792 ????????????????????????????????????????????????????????????
900 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531852 ????????????????????????????????????????????????????????????
960 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531912 ????????????????????????????????????????????????????????????
1020 ------------------------------------------------------------
NR_046654 119 ------------------------------------------------------------
chr3 42531972 ????????????????????????????????????????????????CAGTTCTTCCTT
1080 ------------------------------------------------||||||||||||
NR_046654 119 ------------------------------------------------CAGTTCTTCCTT
chr3 42532032 GAGCGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGAT
1140 |||||--|||||||||||.|||||||||||||||||||||||||||||||||||||||||
NR_046654 107 GAGCG--AGCGGATTGGGTGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGAT
chr3 42532092 GAC?????????????????????????????????????????????????????????
1200 |||---------------------------------------------------------
NR_046654 49 GAC---------------------------------------------------------
chr3 42532152 ????????????????????????????????????????????????????????????
1260 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532212 ????????????????????????????????????????????????????????????
1320 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532272 ????????????????????????????????????????????????????????????
1380 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532332 ????????????????????????????????????????????????????????????
1440 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532392 ????????????????????????????????????????????????????????????
1500 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532452 ????????????????????????????????????????????????????????????
1560 ------------------------------------------------------------
NR_046654 46 ------------------------------------------------------------
chr3 42532512 ???????????????????????????????????????????????????CTAGCATCC
1620 ---------------------------------------------------|||||||||
NR_046654 46 ---------------------------------------------------CTAGCATCC
chr3 42532572 TTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAG 42532606
1680 |||||||||||||||||||||||||||||||||| 1714
NR_046654 37 TTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAG 3
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
NR_046654.1_modified 16 chr3 42530896 0 5S27M3I36M1062N17M2D56M468N43M3S * 0 0 AAAAACGGAAGTACTTCTGGGGGTACATACTCCCCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCCAGGCAGTTCTTCCTTGAGCGAGCGGATTGGGTGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGACCTAGCATCCTTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAGAAA * AS:i:978 NM:i:6
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (179 aligned letters; 178 identities; 1 mismatches; 1535 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 179:
identities = 178,
mismatches = 1.
gaps = 1535:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 1535:
internal_insertions = 3:
open_internal_insertions = 1,
extend_internal_insertions = 2;
internal_deletions = 1532:
open_internal_deletions = 3,
extend_internal_deletions = 1529;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 3)
self.assertEqual(counts.internal_deletions, 1532)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 1535)
self.assertEqual(counts.insertions, 3)
self.assertEqual(counts.deletions, 1532)
self.assertEqual(counts.gaps, 1535)
self.assertEqual(counts.aligned, 179)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 5407))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_111921.1")
self.assertEqual(len(alignment.target.seq), len(self.dna))
self.assertEqual(
alignment.target.seq.defined_ranges,
((48663767, 48663813), (48665640, 48665722), (48669098, 48669174)),
)
for start, end in alignment.target.seq.defined_ranges:
self.assertEqual(alignment.target.seq[start:end], self.dna[start:end])
self.assertEqual(alignment.query.seq, self.rna[alignment.query.id])
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array( [[48663767, 48663813, 48665640, 48665722, 48669098, 48669174],
[ 0, 46, 46, 128, 128, 204]]),
# fmt: on
)
)
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[62., 0., 0., 0.],
[ 0., 42., 0., 0.],
[ 0., 0., 66., 0.],
[ 0., 0., 0., 34.],
])
# fmt: on
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGT")
self.assertEqual(alignment.mapq, 0)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.annotations["NM"], 0)
self.assertNotIn("hard_clip_left", alignment.query.annotations)
self.assertEqual(alignment.query.annotations["hard_clip_right"], 12)
self.assertEqual(alignment.operations, bytearray(b"MNMNM"))
self.assertEqual(
str(alignment),
"""\
chr3 48663767 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCG??????????????
0 ||||||||||||||||||||||||||||||||||||||||||||||--------------
NR_111921 0 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCG--------------
chr3 48663827 ????????????????????????????????????????????????????????????
60 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48663887 ????????????????????????????????????????????????????????????
120 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48663947 ????????????????????????????????????????????????????????????
180 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664007 ????????????????????????????????????????????????????????????
240 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664067 ????????????????????????????????????????????????????????????
300 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664127 ????????????????????????????????????????????????????????????
360 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664187 ????????????????????????????????????????????????????????????
420 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664247 ????????????????????????????????????????????????????????????
480 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664307 ????????????????????????????????????????????????????????????
540 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664367 ????????????????????????????????????????????????????????????
600 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664427 ????????????????????????????????????????????????????????????
660 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664487 ????????????????????????????????????????????????????????????
720 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664547 ????????????????????????????????????????????????????????????
780 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664607 ????????????????????????????????????????????????????????????
840 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664667 ????????????????????????????????????????????????????????????
900 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664727 ????????????????????????????????????????????????????????????
960 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664787 ????????????????????????????????????????????????????????????
1020 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664847 ????????????????????????????????????????????????????????????
1080 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664907 ????????????????????????????????????????????????????????????
1140 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48664967 ????????????????????????????????????????????????????????????
1200 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665027 ????????????????????????????????????????????????????????????
1260 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665087 ????????????????????????????????????????????????????????????
1320 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665147 ????????????????????????????????????????????????????????????
1380 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665207 ????????????????????????????????????????????????????????????
1440 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665267 ????????????????????????????????????????????????????????????
1500 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665327 ????????????????????????????????????????????????????????????
1560 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665387 ????????????????????????????????????????????????????????????
1620 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665447 ????????????????????????????????????????????????????????????
1680 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665507 ????????????????????????????????????????????????????????????
1740 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665567 ????????????????????????????????????????????????????????????
1800 ------------------------------------------------------------
NR_111921 46 ------------------------------------------------------------
chr3 48665627 ?????????????CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
1860 -------------|||||||||||||||||||||||||||||||||||||||||||||||
NR_111921 46 -------------CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
chr3 48665687 TGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGG?????????????????????????
1920 |||||||||||||||||||||||||||||||||||-------------------------
NR_111921 93 TGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGG-------------------------
chr3 48665747 ????????????????????????????????????????????????????????????
1980 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48665807 ????????????????????????????????????????????????????????????
2040 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48665867 ????????????????????????????????????????????????????????????
2100 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48665927 ????????????????????????????????????????????????????????????
2160 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48665987 ????????????????????????????????????????????????????????????
2220 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666047 ????????????????????????????????????????????????????????????
2280 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666107 ????????????????????????????????????????????????????????????
2340 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666167 ????????????????????????????????????????????????????????????
2400 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666227 ????????????????????????????????????????????????????????????
2460 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666287 ????????????????????????????????????????????????????????????
2520 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666347 ????????????????????????????????????????????????????????????
2580 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666407 ????????????????????????????????????????????????????????????
2640 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666467 ????????????????????????????????????????????????????????????
2700 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666527 ????????????????????????????????????????????????????????????
2760 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666587 ????????????????????????????????????????????????????????????
2820 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666647 ????????????????????????????????????????????????????????????
2880 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666707 ????????????????????????????????????????????????????????????
2940 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666767 ????????????????????????????????????????????????????????????
3000 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666827 ????????????????????????????????????????????????????????????
3060 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666887 ????????????????????????????????????????????????????????????
3120 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48666947 ????????????????????????????????????????????????????????????
3180 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667007 ????????????????????????????????????????????????????????????
3240 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667067 ????????????????????????????????????????????????????????????
3300 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667127 ????????????????????????????????????????????????????????????
3360 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667187 ????????????????????????????????????????????????????????????
3420 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667247 ????????????????????????????????????????????????????????????
3480 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667307 ????????????????????????????????????????????????????????????
3540 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667367 ????????????????????????????????????????????????????????????
3600 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667427 ????????????????????????????????????????????????????????????
3660 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667487 ????????????????????????????????????????????????????????????
3720 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667547 ????????????????????????????????????????????????????????????
3780 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667607 ????????????????????????????????????????????????????????????
3840 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667667 ????????????????????????????????????????????????????????????
3900 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667727 ????????????????????????????????????????????????????????????
3960 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667787 ????????????????????????????????????????????????????????????
4020 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667847 ????????????????????????????????????????????????????????????
4080 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667907 ????????????????????????????????????????????????????????????
4140 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48667967 ????????????????????????????????????????????????????????????
4200 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668027 ????????????????????????????????????????????????????????????
4260 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668087 ????????????????????????????????????????????????????????????
4320 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668147 ????????????????????????????????????????????????????????????
4380 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668207 ????????????????????????????????????????????????????????????
4440 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668267 ????????????????????????????????????????????????????????????
4500 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668327 ????????????????????????????????????????????????????????????
4560 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668387 ????????????????????????????????????????????????????????????
4620 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668447 ????????????????????????????????????????????????????????????
4680 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668507 ????????????????????????????????????????????????????????????
4740 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668567 ????????????????????????????????????????????????????????????
4800 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668627 ????????????????????????????????????????????????????????????
4860 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668687 ????????????????????????????????????????????????????????????
4920 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668747 ????????????????????????????????????????????????????????????
4980 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668807 ????????????????????????????????????????????????????????????
5040 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668867 ????????????????????????????????????????????????????????????
5100 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668927 ????????????????????????????????????????????????????????????
5160 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48668987 ????????????????????????????????????????????????????????????
5220 ------------------------------------------------------------
NR_111921 128 ------------------------------------------------------------
chr3 48669047 ???????????????????????????????????????????????????AGACGGGAG
5280 ---------------------------------------------------|||||||||
NR_111921 128 ---------------------------------------------------AGACGGGAG
chr3 48669107 CTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATAT
5340 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NR_111921 137 CTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATAT
chr3 48669167 TAAAAAA 48669174
5400 ||||||| 5407
NR_111921 197 TAAAAAA 204
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
NR_111921.1 0 chr3 48663768 0 46M1827N82M3376N76M12H * 0 0 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCGCTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGCTGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGGAGACGGGAGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATATTAAAAAA * AS:i:1000 NM:i:0
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (204 aligned letters; 204 identities; 0 mismatches; 5203 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 204:
identities = 204,
mismatches = 0.
gaps = 5203:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 5203:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 5203:
open_internal_deletions = 2,
extend_internal_deletions = 5201;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 5203)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 5203)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 5203)
self.assertEqual(counts.gaps, 5203)
self.assertEqual(counts.aligned, 204)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 5409))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_111921.1_modified")
self.assertEqual(len(alignment.target.seq), len(self.dna))
self.assertEqual(
alignment.target.seq.defined_ranges,
((48663767, 48663813), (48665640, 48665722), (48669098, 48669174)),
)
for start, end in alignment.target.seq.defined_ranges:
self.assertEqual(alignment.target.seq[start:end], self.dna[start:end])
self.assertEqual(alignment.query.seq, self.rna[alignment.query.id])
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48663767, 48663795, 48663796, 48663813, 48665640,
48665716, 48665716, 48665722, 48669098, 48669174],
[ 3, 31, 31, 48, 48,
124, 126, 132, 132, 208],
])
# fmt: on
)
)
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[62., 0., 0., 0.],
[ 0., 41., 0., 0.],
[ 0., 2., 64., 0.],
[ 0., 0., 0., 34.],
]),
# fmt: on
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGT")
self.assertEqual(alignment.mapq, 0)
self.assertEqual(alignment.score, 972)
self.assertEqual(alignment.annotations["NM"], 5)
self.assertNotIn("hard_clip_left", alignment.query.annotations)
self.assertEqual(alignment.query.annotations["hard_clip_right"], 12)
self.assertEqual(alignment.operations, bytearray(b"MDMNMIMNM"))
self.assertEqual(
str(alignment),
"""\
chr3 48663767 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCG??????????????
0 ||||||||||||||||||||||||||||-|||||||||||||||||--------------
NR_111921 3 CACGAGAGGAGCGGAGGCGAGGGGTGAA-GCGGAGCACTCCAATCG--------------
chr3 48663827 ????????????????????????????????????????????????????????????
60 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48663887 ????????????????????????????????????????????????????????????
120 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48663947 ????????????????????????????????????????????????????????????
180 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664007 ????????????????????????????????????????????????????????????
240 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664067 ????????????????????????????????????????????????????????????
300 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664127 ????????????????????????????????????????????????????????????
360 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664187 ????????????????????????????????????????????????????????????
420 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664247 ????????????????????????????????????????????????????????????
480 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664307 ????????????????????????????????????????????????????????????
540 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664367 ????????????????????????????????????????????????????????????
600 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664427 ????????????????????????????????????????????????????????????
660 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664487 ????????????????????????????????????????????????????????????
720 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664547 ????????????????????????????????????????????????????????????
780 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664607 ????????????????????????????????????????????????????????????
840 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664667 ????????????????????????????????????????????????????????????
900 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664727 ????????????????????????????????????????????????????????????
960 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664787 ????????????????????????????????????????????????????????????
1020 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664847 ????????????????????????????????????????????????????????????
1080 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664907 ????????????????????????????????????????????????????????????
1140 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48664967 ????????????????????????????????????????????????????????????
1200 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665027 ????????????????????????????????????????????????????????????
1260 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665087 ????????????????????????????????????????????????????????????
1320 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665147 ????????????????????????????????????????????????????????????
1380 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665207 ????????????????????????????????????????????????????????????
1440 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665267 ????????????????????????????????????????????????????????????
1500 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665327 ????????????????????????????????????????????????????????????
1560 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665387 ????????????????????????????????????????????????????????????
1620 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665447 ????????????????????????????????????????????????????????????
1680 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665507 ????????????????????????????????????????????????????????????
1740 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665567 ????????????????????????????????????????????????????????????
1800 ------------------------------------------------------------
NR_111921 48 ------------------------------------------------------------
chr3 48665627 ?????????????CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
1860 -------------|||||||||||||||||||||||||||||||||||||||||||||||
NR_111921 48 -------------CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
chr3 48665687 TGCTGGGCGGCAGATGGAGCGATCAGAAG--ACCAGG???????????????????????
1920 |||||..||||||||||||||||||||||--||||||-----------------------
NR_111921 95 TGCTGCCCGGCAGATGGAGCGATCAGAAGCCACCAGG-----------------------
chr3 48665745 ????????????????????????????????????????????????????????????
1980 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48665805 ????????????????????????????????????????????????????????????
2040 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48665865 ????????????????????????????????????????????????????????????
2100 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48665925 ????????????????????????????????????????????????????????????
2160 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48665985 ????????????????????????????????????????????????????????????
2220 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666045 ????????????????????????????????????????????????????????????
2280 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666105 ????????????????????????????????????????????????????????????
2340 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666165 ????????????????????????????????????????????????????????????
2400 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666225 ????????????????????????????????????????????????????????????
2460 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666285 ????????????????????????????????????????????????????????????
2520 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666345 ????????????????????????????????????????????????????????????
2580 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666405 ????????????????????????????????????????????????????????????
2640 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666465 ????????????????????????????????????????????????????????????
2700 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666525 ????????????????????????????????????????????????????????????
2760 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666585 ????????????????????????????????????????????????????????????
2820 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666645 ????????????????????????????????????????????????????????????
2880 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666705 ????????????????????????????????????????????????????????????
2940 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666765 ????????????????????????????????????????????????????????????
3000 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666825 ????????????????????????????????????????????????????????????
3060 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666885 ????????????????????????????????????????????????????????????
3120 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48666945 ????????????????????????????????????????????????????????????
3180 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667005 ????????????????????????????????????????????????????????????
3240 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667065 ????????????????????????????????????????????????????????????
3300 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667125 ????????????????????????????????????????????????????????????
3360 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667185 ????????????????????????????????????????????????????????????
3420 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667245 ????????????????????????????????????????????????????????????
3480 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667305 ????????????????????????????????????????????????????????????
3540 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667365 ????????????????????????????????????????????????????????????
3600 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667425 ????????????????????????????????????????????????????????????
3660 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667485 ????????????????????????????????????????????????????????????
3720 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667545 ????????????????????????????????????????????????????????????
3780 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667605 ????????????????????????????????????????????????????????????
3840 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667665 ????????????????????????????????????????????????????????????
3900 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667725 ????????????????????????????????????????????????????????????
3960 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667785 ????????????????????????????????????????????????????????????
4020 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667845 ????????????????????????????????????????????????????????????
4080 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667905 ????????????????????????????????????????????????????????????
4140 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48667965 ????????????????????????????????????????????????????????????
4200 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668025 ????????????????????????????????????????????????????????????
4260 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668085 ????????????????????????????????????????????????????????????
4320 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668145 ????????????????????????????????????????????????????????????
4380 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668205 ????????????????????????????????????????????????????????????
4440 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668265 ????????????????????????????????????????????????????????????
4500 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668325 ????????????????????????????????????????????????????????????
4560 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668385 ????????????????????????????????????????????????????????????
4620 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668445 ????????????????????????????????????????????????????????????
4680 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668505 ????????????????????????????????????????????????????????????
4740 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668565 ????????????????????????????????????????????????????????????
4800 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668625 ????????????????????????????????????????????????????????????
4860 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668685 ????????????????????????????????????????????????????????????
4920 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668745 ????????????????????????????????????????????????????????????
4980 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668805 ????????????????????????????????????????????????????????????
5040 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668865 ????????????????????????????????????????????????????????????
5100 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668925 ????????????????????????????????????????????????????????????
5160 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48668985 ????????????????????????????????????????????????????????????
5220 ------------------------------------------------------------
NR_111921 132 ------------------------------------------------------------
chr3 48669045 ?????????????????????????????????????????????????????AGACGGG
5280 -----------------------------------------------------|||||||
NR_111921 132 -----------------------------------------------------AGACGGG
chr3 48669105 AGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTAT
5340 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NR_111921 139 AGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTAT
chr3 48669165 ATTAAAAAA 48669174
5400 ||||||||| 5409
NR_111921 199 ATTAAAAAA 208
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
NR_111921.1_modified 0 chr3 48663768 0 3S28M1D17M1827N76M2I6M3376N76M12H * 0 0 AAACACGAGAGGAGCGGAGGCGAGGGGTGAAGCGGAGCACTCCAATCGCTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGCTGCTGCCCGGCAGATGGAGCGATCAGAAGCCACCAGGAGACGGGAGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATATTAAAAAA * AS:i:972 NM:i:5
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (203 aligned letters; 201 identities; 2 mismatches; 5206 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 203:
identities = 201,
mismatches = 2.
gaps = 5206:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 5206:
internal_insertions = 2:
open_internal_insertions = 1,
extend_internal_insertions = 1;
internal_deletions = 5204:
open_internal_deletions = 3,
extend_internal_deletions = 5201;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 2)
self.assertEqual(counts.internal_deletions, 5204)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 5206)
self.assertEqual(counts.insertions, 2)
self.assertEqual(counts.deletions, 5204)
self.assertEqual(counts.gaps, 5206)
self.assertEqual(counts.aligned, 203)
self.assertRaises(StopIteration, next, alignments)
def test_reading(self):
"""Test parsing dna_rna.sam."""
path = "Blat/dna_rna.sam"
alignments = Align.parse(path, "sam")
self.check_alignments(alignments)
alignments = iter(alignments)
self.check_alignments(alignments)
with Align.parse(path, "sam") as alignments:
self.check_alignments(alignments)
with self.assertRaises(AttributeError):
alignments._stream
with Align.parse(path, "sam") as alignments:
pass
with self.assertRaises(AttributeError):
alignments._stream
def test_reading_psl_comparison(self):
"""Test parsing dna_rna.sam and comparing to dna_rna.psl."""
path = "Blat/dna_rna.sam"
sam_alignments = Align.parse(path, "sam")
path = "Blat/dna_rna.psl"
psl_alignments = Align.parse(path, "psl")
for sam_alignment, psl_alignment in zip(sam_alignments, psl_alignments):
self.assertEqual(sam_alignment.target.id, psl_alignment.target.id)
self.assertEqual(sam_alignment.query.id, psl_alignment.query.id)
self.assertTrue(
np.array_equal(sam_alignment.coordinates, psl_alignment.coordinates)
)
def test_writing(self):
"""Test writing the alignments in dna_rna.sam."""
path = "Blat/dna_rna.sam"
alignments = Align.parse(path, "sam")
stream = StringIO()
n = Align.write(alignments, stream, "sam", md=True)
self.assertEqual(n, 4)
stream.seek(0)
alignments = Align.parse(stream, "sam")
self.check_alignments(alignments)
stream.close()
class TestAlign_dna(unittest.TestCase):
# The SAM files were generated using these commands:
# twoBitInfo hg19.2bit stdout | grep -v chrUn | grep -v _random | grep -v _hap | sort -n -k 2 -r > hg19.chrom.sizes
# psl2sam.pl psl_34_001.psl | samtools view -h -t hg19.chrom.sizes - > psl_34_001.sam
# psl2sam.pl psl_34_003.psl | samtools view -h -t hg19.chrom.sizes - > psl_34_003.sam
# psl2sam.pl psl_34_004.psl | samtools view -h -t hg19.chrom.sizes - > psl_34_004.sam
# psl2sam.pl psl_34_005.psl | samtools view -h -t hg19.chrom.sizes - > psl_34_005.sam
# Note that psl_34_002 was not included as the SAM format no longer allows
# an empty SAM file.
# The hard clipping symbols H were replaced by soft clipping symbols S in
# the file psl_34_005.sam.
def check_alignments_psl_34_001(self, alignments):
"""Check the alignments for psl_34_001/sam."""
self.assertEqual(list(alignments.metadata), ["PG"])
self.assertEqual(len(alignments.targets), 25)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 249250621)
self.assertEqual(alignments.targets[1].id, "chr2")
self.assertEqual(len(alignments.targets[1]), 243199373)
self.assertEqual(alignments.targets[2].id, "chr3")
self.assertEqual(len(alignments.targets[2]), 198022430)
self.assertEqual(alignments.targets[3].id, "chr4")
self.assertEqual(len(alignments.targets[3]), 191154276)
self.assertEqual(alignments.targets[4].id, "chr5")
self.assertEqual(len(alignments.targets[4]), 180915260)
self.assertEqual(alignments.targets[5].id, "chr6")
self.assertEqual(len(alignments.targets[5]), 171115067)
self.assertEqual(alignments.targets[6].id, "chr7")
self.assertEqual(len(alignments.targets[6]), 159138663)
self.assertEqual(alignments.targets[7].id, "chrX")
self.assertEqual(len(alignments.targets[7]), 155270560)
self.assertEqual(alignments.targets[8].id, "chr8")
self.assertEqual(len(alignments.targets[8]), 146364022)
self.assertEqual(alignments.targets[9].id, "chr9")
self.assertEqual(len(alignments.targets[9]), 141213431)
self.assertEqual(alignments.targets[10].id, "chr10")
self.assertEqual(len(alignments.targets[10]), 135534747)
self.assertEqual(alignments.targets[11].id, "chr11")
self.assertEqual(len(alignments.targets[11]), 135006516)
self.assertEqual(alignments.targets[12].id, "chr12")
self.assertEqual(len(alignments.targets[12]), 133851895)
self.assertEqual(alignments.targets[13].id, "chr13")
self.assertEqual(len(alignments.targets[13]), 115169878)
self.assertEqual(alignments.targets[14].id, "chr14")
self.assertEqual(len(alignments.targets[14]), 107349540)
self.assertEqual(alignments.targets[15].id, "chr15")
self.assertEqual(len(alignments.targets[15]), 102531392)
self.assertEqual(alignments.targets[16].id, "chr16")
self.assertEqual(len(alignments.targets[16]), 90354753)
self.assertEqual(alignments.targets[17].id, "chr17")
self.assertEqual(len(alignments.targets[17]), 81195210)
self.assertEqual(alignments.targets[18].id, "chr18")
self.assertEqual(len(alignments.targets[18]), 78077248)
self.assertEqual(alignments.targets[19].id, "chr20")
self.assertEqual(len(alignments.targets[19]), 63025520)
self.assertEqual(alignments.targets[20].id, "chrY")
self.assertEqual(len(alignments.targets[20]), 59373566)
self.assertEqual(alignments.targets[21].id, "chr19")
self.assertEqual(len(alignments.targets[21]), 59128983)
self.assertEqual(alignments.targets[22].id, "chr22")
self.assertEqual(len(alignments.targets[22]), 51304566)
self.assertEqual(alignments.targets[23].id, "chr21")
self.assertEqual(len(alignments.targets[23]), 48129895)
self.assertEqual(alignments.targets[24].id, "chrM")
self.assertEqual(len(alignments.targets[24]), 16571)
self.assertEqual(len(alignments.metadata["PG"]), 1)
self.assertEqual(
alignments.metadata["PG"][0],
{
"ID": "samtools",
"PN": "samtools",
"VN": "1.14",
"CL": "samtools view -h -t hg19.chrom.sizes -",
},
)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 16))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 16)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 0, 16]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr4 61646095 ???????????????? 61646111
0 |||||||||||||||| 16
hg18_dna 0 ???????????????? 16
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 0 chr4 61646096 0 11H16M6H * 0 0 * * AS:i:16
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 33))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 33)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 10271783 ????????????????????????????????? 10271816
0 ||||||||||||||||||||||||||||||||| 33
hg18_dna 0 ????????????????????????????????? 33
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 0 chr1 10271784 0 33M * 0 0 * * AS:i:33
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 17))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 17)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 17, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 53575980 ????????????????? 53575997
0 ||||||||||||||||| 17
hg18_dna 17 ????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 16 chr2 53575981 0 8H17M8H * 0 0 * * AS:i:17
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 141213431)
self.assertEqual(len(alignment.query.seq), 41)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 0, 41]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr9 85737865 ????????????????????????????????????????? 85737906
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 0 ????????????????????????????????????????? 41
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr9 85737866 0 9H41M * 0 0 * * AS:i:29
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 146364022)
self.assertEqual(len(alignment.query.seq), 41)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 0, 41]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr8 95160479 ????????????????????????????????????????? 95160520
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 0 ????????????????????????????????????????? 41
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr8 95160480 0 8H41M1H * 0 0 * * AS:i:41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 0, 36]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr22 42144400 ???????????????????????????????????? 42144436
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 0 ???????????????????????????????????? 36
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr22 42144401 0 11H36M3H * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 48)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183925990, 183926028],
[ 0, 6, 10, 48]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 183925984 ??????----?????????????????????????????????????? 183926028
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
hg19_dna 0 ???????????????????????????????????????????????? 48
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr2 183925985 0 1H6M4I38M1H * 0 0 * * AS:i:27
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 170))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 0, 25, 25, 36]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 35483340 ????????????????????????????????????????????????????????????
0 |||||||||||||||||||||||||-----------------------------------
hg19_dna 0 ?????????????????????????-----------------------------------
chr19 35483400 ????????????????????????????????????????????????????????????
60 ------------------------------------------------------------
hg19_dna 25 ------------------------------------------------------------
chr19 35483460 ?????????????????????????????????????????????????? 35483510
120 ---------------------------------------||||||||||| 170
hg19_dna 25 ---------------------------------------??????????? 36
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr19 35483341 0 10H25M134D11M4H * 0 0 * * AS:i:0
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 39)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 0, 39]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr18 23891310 ??????????????????????????????????????? 23891349
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 0 ??????????????????????????????????????? 39
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr18 23891311 0 10H39M1H * 0 0 * * AS:i:39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 28))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 28)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 0, 28]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr18 43252217 ???????????????????????????? 43252245
0 |||||||||||||||||||||||||||| 28
hg19_dna 0 ???????????????????????????? 28
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr18 43252218 0 21H28M1H * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 51))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 115169878)
self.assertEqual(len(alignment.query.seq), 48)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759157, 52759160, 52759198],
[ 0, 10, 10, 48]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr13 52759147 ??????????????????????????????????????????????????? 52759198
0 ||||||||||---|||||||||||||||||||||||||||||||||||||| 51
hg19_dna 0 ??????????---?????????????????????????????????????? 48
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr13 52759148 0 1H10M3D38M1H * 0 0 * * AS:i:30
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (48 aligned letters; 0 identities; 0 mismatches; 3 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 48:
identities = 0,
mismatches = 0.
gaps = 3:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 3:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 3:
open_internal_deletions = 1,
extend_internal_deletions = 2;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 3)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 3)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 3)
self.assertEqual(counts.gaps, 3)
self.assertEqual(counts.aligned, 48)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 50))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 1207056 ?????????????????????????????????????????????????? 1207106
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
hg19_dna 0 ?????????????????????????????????????????????????? 50
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr1 1207057 0 50M * 0 0 * * AS:i:50
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 34)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 0, 34]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 61700837 ?????????????????????????????????? 61700871
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 0 ?????????????????????????????????? 34
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr1 61700838 0 1H34M15H * 0 0 * * AS:i:22
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 38))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 38)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558173, 37558173, 37558191],
[ 38, 22, 18, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr4 37558157 ????????????????----?????????????????? 37558191
0 ||||||||||||||||----|||||||||||||||||| 38
hg19_dna 38 ?????????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr4 37558158 0 1H16M4I18M11H * 0 0 * * AS:i:15
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 37)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 37, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr22 48997405 ????????????????????????????????????? 48997442
0 ||||||||||||||||||||||||||||||||||||| 37
hg19_dna 37 ????????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr22 48997406 0 1H37M12H * 0 0 * * AS:i:29
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 36, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 120641740 ???????????????????????????????????? 120641776
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 36 ???????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr2 120641741 0 1H36M13H * 0 0 * * AS:i:32
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 39)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 39, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 54017130 ??????????????????????????????????????? 54017169
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 39 ??????????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr19 54017131 0 1H39M10H * 0 0 * * AS:i:39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 39)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 39, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 553742 ??????????????????????????????????????? 553781
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 39 ??????????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr19 553743 0 1H39M10H * 0 0 * * AS:i:27
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 36, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr10 99388555 ???????????????????????????????????? 99388591
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 36 ???????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr10 99388556 0 1H36M13H * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 25))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 25)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 25, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr10 112178171 ????????????????????????? 112178196
0 ||||||||||||||||||||||||| 25
hg19_dna 25 ????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr10 112178172 0 15H25M10H * 0 0 * * AS:i:21
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 36, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 39368490 ???????????????????????????????????? 39368526
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 36 ???????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr1 39368491 0 1H36M13H * 0 0 * * AS:i:32
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 34)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 34, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 220325687 ?????????????????????????????????? 220325721
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 34 ?????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr1 220325688 0 3H34M13H * 0 0 * * AS:i:30
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_001(self):
"""Test parsing psl_34_001.sam."""
path = "Blat/psl_34_001.sam"
alignments = Align.parse(path, "sam")
self.check_alignments_psl_34_001(alignments)
def test_writing_psl_34_001(self):
"""Test writing the alignments in psl_34_001.sam."""
path = "Blat/psl_34_001.sam"
alignments = Align.parse(path, "sam")
stream = StringIO()
n = Align.write(alignments, stream, "sam")
self.assertEqual(n, 22)
stream.seek(0)
alignments = Align.parse(stream, "sam")
self.check_alignments_psl_34_001(alignments)
stream.close()
def check_alignments_psl_34_003(self, alignments):
"""Check the alignments for psl_34_003/sam."""
self.assertEqual(list(alignments.metadata), ["PG"])
self.assertEqual(len(alignments.targets), 25)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 249250621)
self.assertEqual(alignments.targets[1].id, "chr2")
self.assertEqual(len(alignments.targets[1]), 243199373)
self.assertEqual(alignments.targets[2].id, "chr3")
self.assertEqual(len(alignments.targets[2]), 198022430)
self.assertEqual(alignments.targets[3].id, "chr4")
self.assertEqual(len(alignments.targets[3]), 191154276)
self.assertEqual(alignments.targets[4].id, "chr5")
self.assertEqual(len(alignments.targets[4]), 180915260)
self.assertEqual(alignments.targets[5].id, "chr6")
self.assertEqual(len(alignments.targets[5]), 171115067)
self.assertEqual(alignments.targets[6].id, "chr7")
self.assertEqual(len(alignments.targets[6]), 159138663)
self.assertEqual(alignments.targets[7].id, "chrX")
self.assertEqual(len(alignments.targets[7]), 155270560)
self.assertEqual(alignments.targets[8].id, "chr8")
self.assertEqual(len(alignments.targets[8]), 146364022)
self.assertEqual(alignments.targets[9].id, "chr9")
self.assertEqual(len(alignments.targets[9]), 141213431)
self.assertEqual(alignments.targets[10].id, "chr10")
self.assertEqual(len(alignments.targets[10]), 135534747)
self.assertEqual(alignments.targets[11].id, "chr11")
self.assertEqual(len(alignments.targets[11]), 135006516)
self.assertEqual(alignments.targets[12].id, "chr12")
self.assertEqual(len(alignments.targets[12]), 133851895)
self.assertEqual(alignments.targets[13].id, "chr13")
self.assertEqual(len(alignments.targets[13]), 115169878)
self.assertEqual(alignments.targets[14].id, "chr14")
self.assertEqual(len(alignments.targets[14]), 107349540)
self.assertEqual(alignments.targets[15].id, "chr15")
self.assertEqual(len(alignments.targets[15]), 102531392)
self.assertEqual(alignments.targets[16].id, "chr16")
self.assertEqual(len(alignments.targets[16]), 90354753)
self.assertEqual(alignments.targets[17].id, "chr17")
self.assertEqual(len(alignments.targets[17]), 81195210)
self.assertEqual(alignments.targets[18].id, "chr18")
self.assertEqual(len(alignments.targets[18]), 78077248)
self.assertEqual(alignments.targets[19].id, "chr20")
self.assertEqual(len(alignments.targets[19]), 63025520)
self.assertEqual(alignments.targets[20].id, "chrY")
self.assertEqual(len(alignments.targets[20]), 59373566)
self.assertEqual(alignments.targets[21].id, "chr19")
self.assertEqual(len(alignments.targets[21]), 59128983)
self.assertEqual(alignments.targets[22].id, "chr22")
self.assertEqual(len(alignments.targets[22]), 51304566)
self.assertEqual(alignments.targets[23].id, "chr21")
self.assertEqual(len(alignments.targets[23]), 48129895)
self.assertEqual(alignments.targets[24].id, "chrM")
self.assertEqual(len(alignments.targets[24]), 16571)
self.assertEqual(len(alignments.targets), 25)
self.assertEqual(len(alignments.metadata["PG"]), 1)
self.assertEqual(
alignments.metadata["PG"][0],
{
"ID": "samtools",
"PN": "samtools",
"VN": "1.14",
"CL": "samtools view -h -t hg19.chrom.sizes -",
},
)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 16))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 16)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 0, 16]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr4 61646095 ???????????????? 61646111
0 |||||||||||||||| 16
hg18_dna 0 ???????????????? 16
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 0 chr4 61646096 0 11H16M6H * 0 0 * * AS:i:16
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 33))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 33)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 10271783 ????????????????????????????????? 10271816
0 ||||||||||||||||||||||||||||||||| 33
hg18_dna 0 ????????????????????????????????? 33
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 0 chr1 10271784 0 33M * 0 0 * * AS:i:33
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 17))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 17)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 17, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 53575980 ????????????????? 53575997
0 ||||||||||||||||| 17
hg18_dna 17 ????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 16 chr2 53575981 0 8H17M8H * 0 0 * * AS:i:17
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_003(self):
"""Test parsing psl_34_003.sam."""
path = "Blat/psl_34_003.sam"
alignments = Align.parse(path, "sam")
self.check_alignments_psl_34_003(alignments)
def test_writing_psl_34_003(self):
"""Test writing the alignments in psl_34_003.sam."""
path = "Blat/psl_34_003.sam"
alignments = Align.parse(path, "sam")
stream = StringIO()
n = Align.write(alignments, stream, "sam")
self.assertEqual(n, 3)
stream.seek(0)
alignments = Align.parse(stream, "sam")
self.check_alignments_psl_34_003(alignments)
stream.close()
def check_alignments_psl_34_004(self, alignments):
"""Check the alignments for psl_34_004/sam."""
self.assertEqual(list(alignments.metadata), ["PG"])
self.assertEqual(len(alignments.targets), 25)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 249250621)
self.assertEqual(alignments.targets[1].id, "chr2")
self.assertEqual(len(alignments.targets[1]), 243199373)
self.assertEqual(alignments.targets[2].id, "chr3")
self.assertEqual(len(alignments.targets[2]), 198022430)
self.assertEqual(alignments.targets[3].id, "chr4")
self.assertEqual(len(alignments.targets[3]), 191154276)
self.assertEqual(alignments.targets[4].id, "chr5")
self.assertEqual(len(alignments.targets[4]), 180915260)
self.assertEqual(alignments.targets[5].id, "chr6")
self.assertEqual(len(alignments.targets[5]), 171115067)
self.assertEqual(alignments.targets[6].id, "chr7")
self.assertEqual(len(alignments.targets[6]), 159138663)
self.assertEqual(alignments.targets[7].id, "chrX")
self.assertEqual(len(alignments.targets[7]), 155270560)
self.assertEqual(alignments.targets[8].id, "chr8")
self.assertEqual(len(alignments.targets[8]), 146364022)
self.assertEqual(alignments.targets[9].id, "chr9")
self.assertEqual(len(alignments.targets[9]), 141213431)
self.assertEqual(alignments.targets[10].id, "chr10")
self.assertEqual(len(alignments.targets[10]), 135534747)
self.assertEqual(alignments.targets[11].id, "chr11")
self.assertEqual(len(alignments.targets[11]), 135006516)
self.assertEqual(alignments.targets[12].id, "chr12")
self.assertEqual(len(alignments.targets[12]), 133851895)
self.assertEqual(alignments.targets[13].id, "chr13")
self.assertEqual(len(alignments.targets[13]), 115169878)
self.assertEqual(alignments.targets[14].id, "chr14")
self.assertEqual(len(alignments.targets[14]), 107349540)
self.assertEqual(alignments.targets[15].id, "chr15")
self.assertEqual(len(alignments.targets[15]), 102531392)
self.assertEqual(alignments.targets[16].id, "chr16")
self.assertEqual(len(alignments.targets[16]), 90354753)
self.assertEqual(alignments.targets[17].id, "chr17")
self.assertEqual(len(alignments.targets[17]), 81195210)
self.assertEqual(alignments.targets[18].id, "chr18")
self.assertEqual(len(alignments.targets[18]), 78077248)
self.assertEqual(alignments.targets[19].id, "chr20")
self.assertEqual(len(alignments.targets[19]), 63025520)
self.assertEqual(alignments.targets[20].id, "chrY")
self.assertEqual(len(alignments.targets[20]), 59373566)
self.assertEqual(alignments.targets[21].id, "chr19")
self.assertEqual(len(alignments.targets[21]), 59128983)
self.assertEqual(alignments.targets[22].id, "chr22")
self.assertEqual(len(alignments.targets[22]), 51304566)
self.assertEqual(alignments.targets[23].id, "chr21")
self.assertEqual(len(alignments.targets[23]), 48129895)
self.assertEqual(alignments.targets[24].id, "chrM")
self.assertEqual(len(alignments.targets[24]), 16571)
self.assertEqual(len(alignments.targets), 25)
self.assertEqual(len(alignments.metadata["PG"]), 1)
self.assertEqual(
alignments.metadata["PG"][0],
{
"ID": "samtools",
"PN": "samtools",
"VN": "1.14",
"CL": "samtools view -h -t hg19.chrom.sizes -",
},
)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 141213431)
self.assertEqual(len(alignment.query.seq), 41)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 0, 41]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr9 85737865 ????????????????????????????????????????? 85737906
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 0 ????????????????????????????????????????? 41
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr9 85737866 0 9H41M * 0 0 * * AS:i:29
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 146364022)
self.assertEqual(len(alignment.query.seq), 41)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 0, 41]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr8 95160479 ????????????????????????????????????????? 95160520
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 0 ????????????????????????????????????????? 41
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr8 95160480 0 8H41M1H * 0 0 * * AS:i:41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 0, 36]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr22 42144400 ???????????????????????????????????? 42144436
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 0 ???????????????????????????????????? 36
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr22 42144401 0 11H36M3H * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 48))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 48)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183925990, 183926028],
[ 0, 6, 10, 48]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 183925984 ??????----?????????????????????????????????????? 183926028
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
hg19_dna 0 ???????????????????????????????????????????????? 48
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr2 183925985 0 1H6M4I38M1H * 0 0 * * AS:i:27
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 170))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 0, 25, 25, 36]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 35483340 ????????????????????????????????????????????????????????????
0 |||||||||||||||||||||||||-----------------------------------
hg19_dna 0 ?????????????????????????-----------------------------------
chr19 35483400 ????????????????????????????????????????????????????????????
60 ------------------------------------------------------------
hg19_dna 25 ------------------------------------------------------------
chr19 35483460 ?????????????????????????????????????????????????? 35483510
120 ---------------------------------------||||||||||| 170
hg19_dna 25 ---------------------------------------??????????? 36
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr19 35483341 0 10H25M134D11M4H * 0 0 * * AS:i:0
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 39)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 0, 39]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr18 23891310 ??????????????????????????????????????? 23891349
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 0 ??????????????????????????????????????? 39
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr18 23891311 0 10H39M1H * 0 0 * * AS:i:39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 28))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 28)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 0, 28]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr18 43252217 ???????????????????????????? 43252245
0 |||||||||||||||||||||||||||| 28
hg19_dna 0 ???????????????????????????? 28
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr18 43252218 0 21H28M1H * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 51))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 115169878)
self.assertEqual(len(alignment.query.seq), 48)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759157, 52759160, 52759198],
[ 0, 10, 10, 48]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr13 52759147 ??????????????????????????????????????????????????? 52759198
0 ||||||||||---|||||||||||||||||||||||||||||||||||||| 51
hg19_dna 0 ??????????---?????????????????????????????????????? 48
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr13 52759148 0 1H10M3D38M1H * 0 0 * * AS:i:30
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (48 aligned letters; 0 identities; 0 mismatches; 3 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 48:
identities = 0,
mismatches = 0.
gaps = 3:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 3:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 3:
open_internal_deletions = 1,
extend_internal_deletions = 2;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 3)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 3)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 3)
self.assertEqual(counts.gaps, 3)
self.assertEqual(counts.aligned, 48)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 50))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 1207056 ?????????????????????????????????????????????????? 1207106
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
hg19_dna 0 ?????????????????????????????????????????????????? 50
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr1 1207057 0 50M * 0 0 * * AS:i:50
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 34)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 0, 34]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 61700837 ?????????????????????????????????? 61700871
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 0 ?????????????????????????????????? 34
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr1 61700838 0 1H34M15H * 0 0 * * AS:i:22
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 38))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 38)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558173, 37558173, 37558191],
[ 38, 22, 18, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr4 37558157 ????????????????----?????????????????? 37558191
0 ||||||||||||||||----|||||||||||||||||| 38
hg19_dna 38 ?????????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr4 37558158 0 1H16M4I18M11H * 0 0 * * AS:i:15
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 37))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 37)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 37, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr22 48997405 ????????????????????????????????????? 48997442
0 ||||||||||||||||||||||||||||||||||||| 37
hg19_dna 37 ????????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr22 48997406 0 1H37M12H * 0 0 * * AS:i:29
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 36, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 120641740 ???????????????????????????????????? 120641776
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 36 ???????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr2 120641741 0 1H36M13H * 0 0 * * AS:i:32
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 39)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 39, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 54017130 ??????????????????????????????????????? 54017169
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 39 ??????????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr19 54017131 0 1H39M10H * 0 0 * * AS:i:39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 39)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 39, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 553742 ??????????????????????????????????????? 553781
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 39 ??????????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr19 553743 0 1H39M10H * 0 0 * * AS:i:27
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 36, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr10 99388555 ???????????????????????????????????? 99388591
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 36 ???????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr10 99388556 0 1H36M13H * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 25))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 25)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 25, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr10 112178171 ????????????????????????? 112178196
0 ||||||||||||||||||||||||| 25
hg19_dna 25 ????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr10 112178172 0 15H25M10H * 0 0 * * AS:i:21
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 36)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 36, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 39368490 ???????????????????????????????????? 39368526
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 36 ???????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr1 39368491 0 1H36M13H * 0 0 * * AS:i:32
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 34)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 34, 0]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 220325687 ?????????????????????????????????? 220325721
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 34 ?????????????????????????????????? 0
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr1 220325688 0 3H34M13H * 0 0 * * AS:i:30
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_004(self):
"""Test parsing psl_34_004.sam."""
path = "Blat/psl_34_004.sam"
alignments = Align.parse(path, "sam")
self.check_alignments_psl_34_004(alignments)
def test_writing_psl_34_004(self):
"""Test writing the alignments in psl_34_004.sam."""
path = "Blat/psl_34_004.sam"
alignments = Align.parse(path, "sam")
stream = StringIO()
n = Align.write(alignments, stream, "sam")
self.assertEqual(n, 19)
stream.seek(0)
alignments = Align.parse(stream, "sam")
self.check_alignments_psl_34_004(alignments)
stream.close()
def check_alignments_psl_34_005(self, alignments):
"""Check the alignments for psl_34_005.sam."""
self.assertEqual(list(alignments.metadata), ["PG"])
self.assertEqual(len(alignments.targets), 25)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 249250621)
self.assertEqual(alignments.targets[1].id, "chr2")
self.assertEqual(len(alignments.targets[1]), 243199373)
self.assertEqual(alignments.targets[2].id, "chr3")
self.assertEqual(len(alignments.targets[2]), 198022430)
self.assertEqual(alignments.targets[3].id, "chr4")
self.assertEqual(len(alignments.targets[3]), 191154276)
self.assertEqual(alignments.targets[4].id, "chr5")
self.assertEqual(len(alignments.targets[4]), 180915260)
self.assertEqual(alignments.targets[5].id, "chr6")
self.assertEqual(len(alignments.targets[5]), 171115067)
self.assertEqual(alignments.targets[6].id, "chr7")
self.assertEqual(len(alignments.targets[6]), 159138663)
self.assertEqual(alignments.targets[7].id, "chrX")
self.assertEqual(len(alignments.targets[7]), 155270560)
self.assertEqual(alignments.targets[8].id, "chr8")
self.assertEqual(len(alignments.targets[8]), 146364022)
self.assertEqual(alignments.targets[9].id, "chr9")
self.assertEqual(len(alignments.targets[9]), 141213431)
self.assertEqual(alignments.targets[10].id, "chr10")
self.assertEqual(len(alignments.targets[10]), 135534747)
self.assertEqual(alignments.targets[11].id, "chr11")
self.assertEqual(len(alignments.targets[11]), 135006516)
self.assertEqual(alignments.targets[12].id, "chr12")
self.assertEqual(len(alignments.targets[12]), 133851895)
self.assertEqual(alignments.targets[13].id, "chr13")
self.assertEqual(len(alignments.targets[13]), 115169878)
self.assertEqual(alignments.targets[14].id, "chr14")
self.assertEqual(len(alignments.targets[14]), 107349540)
self.assertEqual(alignments.targets[15].id, "chr15")
self.assertEqual(len(alignments.targets[15]), 102531392)
self.assertEqual(alignments.targets[16].id, "chr16")
self.assertEqual(len(alignments.targets[16]), 90354753)
self.assertEqual(alignments.targets[17].id, "chr17")
self.assertEqual(len(alignments.targets[17]), 81195210)
self.assertEqual(alignments.targets[18].id, "chr18")
self.assertEqual(len(alignments.targets[18]), 78077248)
self.assertEqual(alignments.targets[19].id, "chr20")
self.assertEqual(len(alignments.targets[19]), 63025520)
self.assertEqual(alignments.targets[20].id, "chrY")
self.assertEqual(len(alignments.targets[20]), 59373566)
self.assertEqual(alignments.targets[21].id, "chr19")
self.assertEqual(len(alignments.targets[21]), 59128983)
self.assertEqual(alignments.targets[22].id, "chr22")
self.assertEqual(len(alignments.targets[22]), 51304566)
self.assertEqual(alignments.targets[23].id, "chr21")
self.assertEqual(len(alignments.targets[23]), 48129895)
self.assertEqual(alignments.targets[24].id, "chrM")
self.assertEqual(len(alignments.targets[24]), 16571)
self.assertEqual(len(alignments.targets), 25)
self.assertEqual(len(alignments.metadata["PG"]), 1)
self.assertEqual(
alignments.metadata["PG"][0],
{
"ID": "samtools",
"PN": "samtools",
"VN": "1.14",
"CL": "samtools view -h -t hg19.chrom.sizes -",
},
)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 16))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 33)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 11, 27]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr4 61646095 ???????????????? 61646111
0 |||||||||||||||| 16
hg18_dna 11 ???????????????? 27
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 0 chr4 61646096 0 11S16M6S * 0 0 * * AS:i:16
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 33))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 33)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 10271783 ????????????????????????????????? 10271816
0 ||||||||||||||||||||||||||||||||| 33
hg18_dna 0 ????????????????????????????????? 33
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 0 chr1 10271784 0 33M * 0 0 * * AS:i:33
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 17))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 33)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 25, 8]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 53575980 ????????????????? 53575997
0 ||||||||||||||||| 17
hg18_dna 25 ????????????????? 8
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg18_dna 16 chr2 53575981 0 8S17M8S * 0 0 * * AS:i:17
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 141213431)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 9, 50]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr9 85737865 ????????????????????????????????????????? 85737906
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 9 ????????????????????????????????????????? 50
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr9 85737866 0 9S41M * 0 0 * * AS:i:29
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 146364022)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 8, 49]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr8 95160479 ????????????????????????????????????????? 95160520
0 ||||||||||||||||||||||||||||||||||||||||| 41
hg19_dna 8 ????????????????????????????????????????? 49
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr8 95160480 0 8S41M1S * 0 0 * * AS:i:41
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 11, 47]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr22 42144400 ???????????????????????????????????? 42144436
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 11 ???????????????????????????????????? 47
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr22 42144401 0 11S36M3S * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 48))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183925990, 183926028],
[ 1, 7, 11, 49]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 183925984 ??????----?????????????????????????????????????? 183926028
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
hg19_dna 1 ???????????????????????????????????????????????? 49
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr2 183925985 0 1S6M4I38M1S * 0 0 * * AS:i:27
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 170))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 10, 35, 35, 46]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 35483340 ????????????????????????????????????????????????????????????
0 |||||||||||||||||||||||||-----------------------------------
hg19_dna 10 ?????????????????????????-----------------------------------
chr19 35483400 ????????????????????????????????????????????????????????????
60 ------------------------------------------------------------
hg19_dna 35 ------------------------------------------------------------
chr19 35483460 ?????????????????????????????????????????????????? 35483510
120 ---------------------------------------||||||||||| 170
hg19_dna 35 ---------------------------------------??????????? 46
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr19 35483341 0 10S25M134D11M4S * 0 0 * * AS:i:0
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 10, 49]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr18 23891310 ??????????????????????????????????????? 23891349
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 10 ??????????????????????????????????????? 49
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr18 23891311 0 10S39M1S * 0 0 * * AS:i:39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 28))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 78077248)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 21, 49]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr18 43252217 ???????????????????????????? 43252245
0 |||||||||||||||||||||||||||| 28
hg19_dna 21 ???????????????????????????? 49
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr18 43252218 0 21S28M1S * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 51))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 115169878)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759157, 52759160, 52759198],
[ 1, 11, 11, 49]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr13 52759147 ??????????????????????????????????????????????????? 52759198
0 ||||||||||---|||||||||||||||||||||||||||||||||||||| 51
hg19_dna 1 ??????????---?????????????????????????????????????? 49
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr13 52759148 0 1S10M3D38M1S * 0 0 * * AS:i:30
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (48 aligned letters; 0 identities; 0 mismatches; 3 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 48:
identities = 0,
mismatches = 0.
gaps = 3:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 3:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 3:
open_internal_deletions = 1,
extend_internal_deletions = 2;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 3)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 3)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 3)
self.assertEqual(counts.gaps, 3)
self.assertEqual(counts.aligned, 48)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 50))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 1207056 ?????????????????????????????????????????????????? 1207106
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
hg19_dna 0 ?????????????????????????????????????????????????? 50
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr1 1207057 0 50M * 0 0 * * AS:i:50
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 1, 35]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 61700837 ?????????????????????????????????? 61700871
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 1 ?????????????????????????????????? 35
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 0 chr1 61700838 0 1S34M15S * 0 0 * * AS:i:22
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 38))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 191154276)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558173, 37558173, 37558191],
[ 49, 33, 29, 11]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr4 37558157 ????????????????----?????????????????? 37558191
0 ||||||||||||||||----|||||||||||||||||| 38
hg19_dna 49 ?????????????????????????????????????? 11
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr4 37558158 0 1S16M4I18M11S * 0 0 * * AS:i:15
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 4:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 4:
internal_insertions = 4:
open_internal_insertions = 1,
extend_internal_insertions = 3;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 4)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 4)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 4)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 37))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 51304566)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 49, 12]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr22 48997405 ????????????????????????????????????? 48997442
0 ||||||||||||||||||||||||||||||||||||| 37
hg19_dna 49 ????????????????????????????????????? 12
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr22 48997406 0 1S37M12S * 0 0 * * AS:i:29
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 243199373)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr2 120641740 ???????????????????????????????????? 120641776
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr2 120641741 0 1S36M13S * 0 0 * * AS:i:32
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 49, 10]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 54017130 ??????????????????????????????????????? 54017169
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 49 ??????????????????????????????????????? 10
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr19 54017131 0 1S39M10S * 0 0 * * AS:i:39
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 59128983)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 49, 10]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr19 553742 ??????????????????????????????????????? 553781
0 ||||||||||||||||||||||||||||||||||||||| 39
hg19_dna 49 ??????????????????????????????????????? 10
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr19 553743 0 1S39M10S * 0 0 * * AS:i:27
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr10 99388555 ???????????????????????????????????? 99388591
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr10 99388556 0 1S36M13S * 0 0 * * AS:i:24
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 25))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 135534747)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 35, 10]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr10 112178171 ????????????????????????? 112178196
0 ||||||||||||||||||||||||| 25
hg19_dna 35 ????????????????????????? 10
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr10 112178172 0 15S25M10S * 0 0 * * AS:i:21
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 49, 13]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 39368490 ???????????????????????????????????? 39368526
0 |||||||||||||||||||||||||||||||||||| 36
hg19_dna 49 ???????????????????????????????????? 13
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr1 39368491 0 1S36M13S * 0 0 * * AS:i:32
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertEqual(len(alignment.target.seq), 249250621)
self.assertEqual(len(alignment.query.seq), 50)
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 47, 13]]),
# fmt: on
)
)
self.assertEqual(
str(alignment),
"""\
chr1 220325687 ?????????????????????????????????? 220325721
0 |||||||||||||||||||||||||||||||||| 34
hg19_dna 47 ?????????????????????????????????? 13
""",
)
self.assertEqual(
format(alignment, "sam"),
"""\
hg19_dna 16 chr1 220325688 0 3S34M13S * 0 0 * * AS:i:30
""",
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_005(self):
"""Test parsing psl_34_005.sam."""
path = "Blat/psl_34_005.sam"
alignments = Align.parse(path, "sam")
self.check_alignments_psl_34_005(alignments)
def test_writing_psl_34_005(self):
"""Test writing the alignments in psl_34_005.sam."""
path = "Blat/psl_34_005.sam"
alignments = Align.parse(path, "sam")
stream = StringIO()
n = Align.write(alignments, stream, "sam")
self.assertEqual(n, 22)
stream.seek(0)
alignments = Align.parse(stream, "sam")
self.check_alignments_psl_34_005(alignments)
stream.close()
class TestAlign_sambam(unittest.TestCase):
def test_ex1(self):
alignments = Align.parse("SamBam/ex1.sam", "sam")
n = 0
for alignment in alignments:
n += 1
self.assertEqual(n, 3270)
self.assertEqual(alignment.sequences[0].id, "chr2")
self.assertEqual(alignment.sequences[1].id, "EAS114_26:7:37:79:581")
self.assertEqual(
alignment.sequences[1].seq, "TTTTCTGGCATGAAAAAAAAAAAAAAAAAAAAAAA"
)
self.assertEqual(alignment.flag, 83)
self.assertEqual(alignment.mapq, 68)
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[1532, 1567], [35, 0]]))
)
self.assertEqual(alignment.rnext, "chr2")
self.assertEqual(alignment.pnext, 1348)
self.assertEqual(alignment.tlen, -219)
self.assertEqual(
alignment.sequences[1].letter_annotations["phred_quality"],
[
18,
11,
11,
11,
28,
28,
28,
21,
28,
28,
28,
27,
28,
28,
28,
27,
26,
28,
28,
28,
28,
28,
12,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
],
)
self.assertEqual(len(alignment.annotations), 6)
self.assertEqual(alignment.annotations["MF"], 18)
self.assertEqual(alignment.annotations["Aq"], 27)
self.assertEqual(alignment.annotations["NM"], 2)
self.assertEqual(alignment.annotations["UQ"], 23)
self.assertEqual(alignment.annotations["H0"], 0)
self.assertEqual(alignment.annotations["H1"], 1)
def test_ex1_header(self):
alignments = Align.parse("SamBam/ex1_header.sam", "sam")
self.assertEqual(alignments.metadata["HD"], {"VN": "1.3", "SO": "coordinate"})
self.assertEqual(len(alignments.targets), 2)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0].seq), 1575)
self.assertEqual(alignments.targets[1].id, "chr2")
self.assertEqual(len(alignments.targets[1].seq), 1584)
n = 0
for alignment in alignments:
n += 1
self.assertEqual(n, 3270)
self.assertEqual(alignment.sequences[0].id, "chr2")
self.assertEqual(len(alignment.sequences[0].seq), 1584)
self.assertEqual(alignment.sequences[1].id, "EAS114_26:7:37:79:581")
self.assertEqual(
alignment.sequences[1].seq, "TTTTCTGGCATGAAAAAAAAAAAAAAAAAAAAAAA"
)
self.assertEqual(alignment.flag, 83)
self.assertEqual(alignment.mapq, 68)
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[1532, 1567], [35, 0]]))
)
self.assertEqual(alignment.rnext, "chr2")
self.assertEqual(alignment.pnext, 1348)
self.assertEqual(alignment.tlen, -219)
self.assertEqual(
alignment.sequences[1].letter_annotations["phred_quality"],
[
18,
11,
11,
11,
28,
28,
28,
21,
28,
28,
28,
27,
28,
28,
28,
27,
26,
28,
28,
28,
28,
28,
12,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
28,
],
)
self.assertEqual(len(alignment.annotations), 6)
self.assertEqual(alignment.annotations["MF"], 18)
self.assertEqual(alignment.annotations["Aq"], 27)
self.assertEqual(alignment.annotations["NM"], 2)
self.assertEqual(alignment.annotations["UQ"], 23)
self.assertEqual(alignment.annotations["H0"], 0)
self.assertEqual(alignment.annotations["H1"], 1)
def test_sam1(self):
alignments = Align.parse("SamBam/sam1.sam", "sam")
self.assertEqual(len(alignments.targets), 1)
self.assertEqual(alignments.targets[0].id, "1")
self.assertEqual(len(alignments.targets[0].seq), 239940)
self.assertEqual(
alignments.metadata["PG"][0],
{
"ID": "bwa",
"PN": "bwa",
"VN": "0.6.2-r126",
},
)
n = 0
for alignment in alignments:
n += 1
self.assertEqual(n, 200)
self.assertIsNone(alignment.sequences[0])
self.assertEqual(
alignment.sequences[1].id, "HWI-1KL120:88:D0LRBACXX:1:1101:5516:2195"
)
self.assertEqual(
alignment.sequences[1].seq,
"GGCCCAACCGTCCTATATGAGATGTAGCATGGTACAGAACAAACTGCTTACACAGGTCTCACTAGTTAGAAACCTGTGGGCCATGGAGGTCAGACATCCAT",
)
self.assertEqual(alignment.flag, 141)
self.assertEqual(alignment.mapq, 0)
self.assertIsNone(alignment.coordinates)
self.assertEqual(
alignment.sequences[1].letter_annotations["phred_quality"],
[
33,
30,
16,
32,
35,
35,
35,
35,
32,
37,
37,
37,
35,
36,
38,
37,
36,
38,
39,
36,
36,
35,
30,
35,
30,
36,
33,
27,
36,
38,
33,
26,
37,
29,
37,
39,
40,
29,
38,
36,
33,
39,
36,
37,
31,
31,
27,
33,
37,
29,
35,
30,
37,
27,
37,
33,
28,
34,
29,
37,
26,
34,
31,
37,
34,
22,
31,
28,
26,
28,
36,
28,
22,
28,
30,
31,
26,
26,
26,
23,
20,
21,
30,
31,
26,
26,
26,
26,
20,
20,
24,
7,
11,
11,
20,
30,
18,
29,
20,
29,
31,
],
)
def test_sam2(self):
alignments = Align.parse("SamBam/sam2.sam", "sam")
self.assertEqual(len(alignments.targets), 1)
self.assertEqual(alignments.targets[0].id, "1")
self.assertEqual(len(alignments.targets[0].seq), 239940)
self.assertEqual(
alignments.metadata["PG"][0],
{
"ID": "bwa",
"PN": "bwa",
"VN": "0.6.2-r126",
},
)
n = 0
for alignment in alignments:
if n == 8:
self.assertEqual(alignment.sequences[0].id, "1")
self.assertEqual(len(alignment.sequences[0].seq), 239940)
self.assertEqual(
alignment.sequences[0].seq.defined_ranges, ((132615, 132716),)
)
self.assertEqual(
alignment.sequences[0].seq[132615:132716],
"GGTCACACCCTGTCCTCCTCCTACACATACTCGGATGCTTCCTCCTCAACCTTGGCACCCACCTCCTTCTTACTGGGCCCAGGAGCCTTCAAAGCCCAGGA",
)
self.assertEqual(
alignment.sequences[1].id,
"HWI-1KL120:88:D0LRBACXX:1:1101:2205:2204",
)
self.assertEqual(
alignment.sequences[1].seq,
"TCCTGGGCATTGAAGGCTCCTGGGCCCAGTAAGAAGGAGGTGGGTGCCAAGGTTGAGGAGGAAGCATCCGAGTATGTGTAGGAGGAGGACAAGGTGGGACC",
)
self.assertEqual(alignment.flag, 83)
self.assertEqual(alignment.mapq, 60)
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[132615, 132716], [101, 0]])
)
)
self.assertEqual(alignment.rnext, "1")
self.assertEqual(alignment.pnext, 132490)
self.assertEqual(alignment.tlen, -226)
self.assertEqual(
alignment.sequences[1].letter_annotations["phred_quality"],
[
33,
33,
33,
31,
30,
34,
29,
30,
30,
30,
34,
33,
33,
35,
35,
35,
35,
35,
35,
35,
34,
34,
29,
34,
29,
29,
34,
30,
30,
30,
28,
35,
36,
36,
36,
35,
37,
36,
35,
33,
38,
38,
39,
40,
40,
36,
36,
35,
28,
39,
37,
32,
38,
40,
40,
39,
35,
39,
38,
35,
30,
38,
40,
41,
41,
41,
40,
39,
40,
38,
37,
35,
39,
37,
40,
41,
41,
41,
40,
41,
41,
41,
41,
41,
41,
41,
41,
41,
39,
39,
39,
39,
37,
37,
37,
37,
37,
37,
34,
34,
34,
],
)
self.assertEqual(len(alignment.annotations), 9)
self.assertEqual(alignment.annotations["XT"], "U")
self.assertEqual(alignment.annotations["NM"], 3)
self.assertEqual(alignment.annotations["SM"], 37)
self.assertEqual(alignment.annotations["AM"], 37)
self.assertEqual(alignment.annotations["X0"], 1)
self.assertEqual(alignment.annotations["X1"], 0)
self.assertEqual(alignment.annotations["XM"], 3)
self.assertEqual(alignment.annotations["XO"], 0)
self.assertEqual(alignment.annotations["XG"], 0)
elif n == 9:
self.assertEqual(alignment.sequences[0].id, "1")
self.assertEqual(len(alignment.sequences[0].seq), 239940)
self.assertEqual(
alignment.sequences[0].seq.defined_ranges, ((132490, 132591),)
)
self.assertEqual(
alignment.sequences[0].seq[132490:132591],
"GCAACAAGGGCTTTGGTGGGAAGGTATTTGCACCTGTCATTCCTTCCTCCTTTACTCCTGCCGCCCCTTGCTGGATCCTGAGCCCCCAGGGTCCCCCGATC",
)
self.assertEqual(
alignment.sequences[1].id,
"HWI-1KL120:88:D0LRBACXX:1:1101:2205:2204",
)
self.assertEqual(
alignment.sequences[1].seq,
"GCAACAAGGGCTTTGGTGGGAAGGTATCTGCACCTGTCATTCCTTCCTCCTTTACTCCTGCCGCCCCTTGCTGGATCCTGAGCCCCCAGGGTCCCCCGATC",
)
self.assertEqual(alignment.flag, 163)
self.assertEqual(alignment.mapq, 60)
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[132490, 132591], [0, 101]])
)
)
self.assertEqual(alignment.rnext, "1")
self.assertEqual(alignment.pnext, 132615)
self.assertEqual(alignment.tlen, 226)
self.assertEqual(
alignment.sequences[1].letter_annotations["phred_quality"],
[
34,
34,
34,
35,
37,
37,
37,
37,
39,
39,
39,
39,
39,
41,
41,
39,
36,
39,
40,
41,
40,
40,
40,
41,
30,
36,
39,
40,
41,
40,
40,
41,
41,
41,
38,
39,
37,
38,
34,
39,
38,
41,
41,
40,
40,
40,
41,
41,
41,
41,
41,
41,
39,
40,
40,
40,
40,
40,
41,
41,
41,
41,
40,
41,
39,
37,
37,
33,
36,
34,
36,
36,
35,
35,
33,
35,
35,
35,
35,
35,
35,
35,
35,
35,
35,
35,
35,
29,
31,
27,
20,
24,
32,
33,
35,
35,
33,
33,
2,
2,
2,
],
)
self.assertEqual(len(alignment.annotations), 9)
self.assertEqual(alignment.annotations["XT"], "U")
self.assertEqual(alignment.annotations["NM"], 1)
self.assertEqual(alignment.annotations["SM"], 37)
self.assertEqual(alignment.annotations["AM"], 37)
self.assertEqual(alignment.annotations["X0"], 1)
self.assertEqual(alignment.annotations["X1"], 0)
self.assertEqual(alignment.annotations["XM"], 1)
self.assertEqual(alignment.annotations["XO"], 0)
self.assertEqual(alignment.annotations["XG"], 0)
elif n == 100:
self.assertEqual(alignment.sequences[0].id, "1")
self.assertEqual(len(alignment.sequences[0].seq), 239940)
self.assertEqual(
alignment.sequences[0].seq.defined_ranges, ((137538, 137639),)
)
self.assertEqual(
alignment.sequences[0].seq[137538:137639],
"AAAGTTCGGGGCCTACAAAGGCGGTTGGGAGCTGGGCAGGAGTTGAGCCAAAAGAGCTTGCTTACTTGCTGGGAGGCAGGGCCGGGAGAGCCCGACTTCAG",
)
self.assertEqual(
alignment.sequences[1].id,
"HWI-1KL120:88:D0LRBACXX:1:1101:4673:2125",
)
self.assertEqual(
alignment.sequences[1].seq,
"AAAGTTCGGGGCCTACAAAGGCGGTTGGGAGCTGGGCAGGAGTTGAGCCAAAAGAGCTTGCTTACTTGCTGGGAGGCAGGACCGGGAGAGGCCGACTTCAG",
)
self.assertEqual(alignment.flag, 97)
self.assertEqual(alignment.mapq, 37)
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[137538, 137639], [0, 101]])
)
)
self.assertEqual(alignment.rnext, "1")
self.assertEqual(alignment.pnext, 135649)
self.assertEqual(alignment.tlen, -1788)
self.assertEqual(
alignment.sequences[1].letter_annotations["phred_quality"],
[
34,
34,
34,
37,
37,
37,
37,
37,
39,
39,
39,
39,
39,
41,
41,
41,
41,
41,
41,
41,
41,
41,
41,
41,
38,
39,
38,
40,
41,
40,
40,
40,
41,
41,
40,
41,
40,
39,
39,
39,
37,
37,
34,
34,
34,
36,
36,
34,
36,
36,
35,
35,
33,
35,
35,
35,
34,
35,
35,
35,
34,
35,
34,
35,
35,
35,
35,
35,
35,
35,
35,
35,
35,
30,
33,
31,
33,
35,
35,
35,
35,
35,
35,
35,
35,
35,
35,
35,
33,
35,
35,
35,
33,
29,
31,
35,
33,
31,
34,
34,
35,
],
)
self.assertEqual(len(alignment.annotations), 9)
self.assertEqual(alignment.annotations["XT"], "U")
self.assertEqual(alignment.annotations["NM"], 2)
self.assertEqual(alignment.annotations["SM"], 37)
self.assertEqual(alignment.annotations["AM"], 37)
self.assertEqual(alignment.annotations["X0"], 1)
self.assertEqual(alignment.annotations["X1"], 0)
self.assertEqual(alignment.annotations["XM"], 2)
self.assertEqual(alignment.annotations["XO"], 0)
self.assertEqual(alignment.annotations["XG"], 0)
elif n == 101:
self.assertEqual(alignment.sequences[0].id, "1")
self.assertEqual(len(alignment.sequences[0].seq), 239940)
self.assertEqual(
alignment.sequences[0].seq.defined_ranges, ((135649, 135750),)
)
self.assertEqual(
alignment.sequences[0].seq[135649:135750],
"TGGAGAGGCCACCGCGAGGCCTGAGCTGGGCCTGGGGAGCTTGGCTTAGGGAAGTTGTGGGCCTACCAGGGCCGCTGGGAGCTGGGCAGGAGCTGAGTCCA",
)
self.assertEqual(
alignment.sequences[1].id,
"HWI-1KL120:88:D0LRBACXX:1:1101:4673:2125",
)
self.assertEqual(
alignment.sequences[1].seq,
"TGGACTCAGCTCCTGCCCAGCTCCCAGCGGCCCTGGTAGGCCCACAACTTCCCGAAGCCAAGCTCCCCAGGCCCAGCTCAGGCCTCACGGTGGCCTCTCCA",
)
self.assertEqual(alignment.flag, 145)
self.assertEqual(alignment.mapq, 37)
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[135649, 135750], [101, 0]])
)
)
self.assertEqual(alignment.rnext, "1")
self.assertEqual(alignment.pnext, 137538)
self.assertEqual(alignment.tlen, 1788)
self.assertEqual(
alignment.sequences[1].letter_annotations["phred_quality"],
[
34,
34,
34,
32,
33,
34,
32,
32,
33,
33,
31,
33,
33,
35,
35,
35,
35,
33,
35,
34,
35,
34,
35,
35,
33,
35,
35,
35,
35,
35,
33,
30,
35,
35,
35,
35,
35,
34,
33,
35,
36,
34,
36,
35,
37,
37,
39,
37,
39,
40,
40,
41,
41,
40,
41,
41,
41,
40,
41,
41,
40,
40,
39,
39,
38,
41,
40,
41,
40,
41,
40,
41,
40,
38,
41,
41,
41,
41,
41,
41,
40,
41,
40,
40,
41,
41,
41,
41,
39,
39,
39,
39,
39,
37,
37,
37,
37,
37,
34,
34,
34,
],
)
self.assertEqual(len(alignment.annotations), 9)
self.assertEqual(alignment.annotations["XT"], "U")
self.assertEqual(alignment.annotations["NM"], 2)
self.assertEqual(alignment.annotations["SM"], 37)
self.assertEqual(alignment.annotations["AM"], 37)
self.assertEqual(alignment.annotations["X0"], 1)
self.assertEqual(alignment.annotations["X1"], 0)
self.assertEqual(alignment.annotations["XM"], 2)
self.assertEqual(alignment.annotations["XO"], 0)
self.assertEqual(alignment.annotations["XG"], 0)
else:
self.assertIsNone(alignment.sequences[0])
self.assertEqual(alignment.mapq, 0)
self.assertIsNone(alignment.coordinates)
n += 1
self.assertEqual(n, 200)
class TestAlign_clipping(unittest.TestCase):
def test_6M(self):
"""Test alignment starting at non-zero position."""
target_seq = Seq("AAAAAAAACCCCCC")
query_seq = Seq("CCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[8, 14], [0, 6]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 8 CCCCCC 14
0 |||||| 6
query 0 CCCCCC 6
""",
)
line = alignment.format("sam")
self.assertEqual(line, "query\t0\ttarget\t9\t255\t6M\t*\t0\t0\tCCCCCC\t*\n")
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 8)
cigar = fields[5]
self.assertEqual(cigar, "6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['C', 'C', 'C', 'C', 'C', 'C'],
['C', 'C', 'C', 'C', 'C', 'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_8D6M_ex1(self):
"""Test alignment starting with deletion."""
target_seq = Seq("AAAAAAAACCCCCC")
query_seq = Seq("CCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[0, 8, 14], [0, 0, 6]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 0 AAAAAAAACCCCCC 14
0 --------|||||| 14
query 0 --------CCCCCC 6
""",
)
line = alignment.format("sam")
self.assertEqual(line, "query\t0\ttarget\t1\t255\t8D6M\t*\t0\t0\tCCCCCC\t*\n")
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 0)
cigar = fields[5]
self.assertEqual(cigar, "8D6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
'C'],
['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 8:
left_gaps = 8:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 8:
open_left_deletions = 1,
extend_left_deletions = 7;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 8)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 8)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 8)
self.assertEqual(counts.gaps, 8)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_8D6M_ex2(self):
"""Test alignment starting with deletion at non-zero position."""
target_seq = Seq("GGGGAAAAAAAACCCCCC")
query_seq = Seq("CCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[4, 12, 18], [0, 0, 6]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 4 AAAAAAAACCCCCC 18
0 --------|||||| 14
query 0 --------CCCCCC 6
""",
)
line = alignment.format("sam")
self.assertEqual(line, "query\t0\ttarget\t5\t255\t8D6M\t*\t0\t0\tCCCCCC\t*\n")
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 4)
cigar = fields[5]
self.assertEqual(cigar, "8D6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
'C'],
['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 8:
left_gaps = 8:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 8:
open_left_deletions = 1,
extend_left_deletions = 7;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 8)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 8)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 8)
self.assertEqual(counts.gaps, 8)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_8I6M_ex1(self):
"""Test alignment starting with insertion."""
target_seq = Seq("CCCCCC")
query_seq = Seq("AAAAAAAACCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[0, 0, 6], [0, 8, 14]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 0 --------CCCCCC 6
0 --------|||||| 14
query 0 AAAAAAAACCCCCC 14
""",
)
line = alignment.format("sam")
self.assertEqual(
line, "query\t0\ttarget\t1\t255\t8I6M\t*\t0\t0\tAAAAAAAACCCCCC\t*\n"
)
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 0)
cigar = fields[5]
self.assertEqual(cigar, "8I6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
'C'],
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 8:
left_gaps = 8:
left_insertions = 8:
open_left_insertions = 1,
extend_left_insertions = 7;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 8)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 8)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 8)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 8)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_8I6M_ex2(self):
"""Test alignment starting with insertion at non-zero position."""
target_seq = Seq("GGGGCCCCCC")
query_seq = Seq("AAAAAAAACCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[4, 4, 10], [0, 8, 14]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 4 --------CCCCCC 10
0 --------|||||| 14
query 0 AAAAAAAACCCCCC 14
""",
)
line = alignment.format("sam")
self.assertEqual(
line, "query\t0\ttarget\t5\t255\t8I6M\t*\t0\t0\tAAAAAAAACCCCCC\t*\n"
)
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 4)
cigar = fields[5]
self.assertEqual(cigar, "8I6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
'C'],
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 8:
left_gaps = 8:
left_insertions = 8:
open_left_insertions = 1,
extend_left_insertions = 7;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 8)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 8)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 8)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 8)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_8S6M(self):
"""Test alignment starting with soft clip."""
target_seq = Seq("CCCCCC")
query_seq = Seq("AAAAAAAACCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[0, 6], [8, 14]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 0 CCCCCC 6
0 |||||| 6
query 8 CCCCCC 14
""",
)
line = alignment.format("sam")
self.assertEqual(
line, "query\t0\ttarget\t1\t255\t8S6M\t*\t0\t0\tAAAAAAAACCCCCC\t*\n"
)
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 0)
cigar = fields[5]
self.assertEqual(cigar, "8S6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['C', 'C', 'C', 'C', 'C', 'C'],
['C', 'C', 'C', 'C', 'C', 'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_4S8D6M(self):
"""Test alignment starting with soft clip followed by deletion."""
target_seq = Seq("AAAAAAAACCCCCC")
query_seq = Seq("GGGGCCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[0, 8, 14], [4, 4, 10]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 0 AAAAAAAACCCCCC 14
0 --------|||||| 14
query 4 --------CCCCCC 10
""",
)
line = alignment.format("sam")
self.assertEqual(
line, "query\t0\ttarget\t1\t255\t4S8D6M\t*\t0\t0\tGGGGCCCCCC\t*\n"
)
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 0)
cigar = fields[5]
self.assertEqual(cigar, "4S8D6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
'C'],
['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 8:
left_gaps = 8:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 8:
open_left_deletions = 1,
extend_left_deletions = 7;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 8)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 8)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 8)
self.assertEqual(counts.gaps, 8)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_4I8D6M(self):
"""Test alignment starting with insertion followed by deletion."""
target_seq = Seq("AAAAAAAACCCCCC")
query_seq = Seq("GGGGCCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[0, 0, 8, 14], [0, 4, 4, 10]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 0 ----AAAAAAAACCCCCC 14
0 ------------|||||| 18
query 0 GGGG--------CCCCCC 10
""",
)
line = alignment.format("sam")
self.assertEqual(
line, "query\t0\ttarget\t1\t255\t4I8D6M\t*\t0\t0\tGGGGCCCCCC\t*\n"
)
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 0)
cigar = fields[5]
self.assertEqual(cigar, "4I8D6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['-', '-', '-', '-', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C',
'C', 'C', 'C', 'C', 'C'],
['G', 'G', 'G', 'G', '-', '-', '-', '-', '-', '-', '-', '-', 'C',
'C', 'C', 'C', 'C', 'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 12 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 12:
left_gaps = 4:
left_insertions = 4:
open_left_insertions = 1,
extend_left_insertions = 3;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 8:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 8:
open_internal_deletions = 1,
extend_internal_deletions = 7;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 4)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 8)
self.assertEqual(counts.left_gaps, 4)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 8)
self.assertEqual(counts.insertions, 4)
self.assertEqual(counts.deletions, 8)
self.assertEqual(counts.gaps, 12)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_4S6M(self):
"""Test alignment starting with soft clip at non-zero position."""
target_seq = Seq("AAAAAAAACCCCCC")
query_seq = Seq("GGGGCCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[8, 14], [4, 10]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 8 CCCCCC 14
0 |||||| 6
query 4 CCCCCC 10
""",
)
line = alignment.format("sam")
self.assertEqual(
line, "query\t0\ttarget\t9\t255\t4S6M\t*\t0\t0\tGGGGCCCCCC\t*\n"
)
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 8)
cigar = fields[5]
self.assertEqual(cigar, "4S6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['C', 'C', 'C', 'C', 'C', 'C'],
['C', 'C', 'C', 'C', 'C', 'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_4D8I6M(self):
"""Test alignment starting with deletion followed by insertion."""
target_seq = Seq("GGGGCCCCCC")
query_seq = Seq("AAAAAAAACCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[0, 4, 4, 10], [0, 0, 8, 14]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 0 GGGG--------CCCCCC 10
0 ------------|||||| 18
query 0 ----AAAAAAAACCCCCC 14
""",
)
line = alignment.format("sam")
self.assertEqual(
line, "query\t0\ttarget\t1\t255\t4D8I6M\t*\t0\t0\tAAAAAAAACCCCCC\t*\n"
)
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 0)
cigar = fields[5]
self.assertEqual(cigar, "4D8I6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['G', 'G', 'G', 'G', '-', '-', '-', '-', '-', '-', '-', '-', 'C',
'C', 'C', 'C', 'C', 'C'],
['-', '-', '-', '-', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C',
'C', 'C', 'C', 'C', 'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 12 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 12:
left_gaps = 4:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 4:
open_left_deletions = 1,
extend_left_deletions = 3;
internal_gaps = 8:
internal_insertions = 8:
open_internal_insertions = 1,
extend_internal_insertions = 7;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 4)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 8)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 4)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 8)
self.assertEqual(counts.insertions, 8)
self.assertEqual(counts.deletions, 4)
self.assertEqual(counts.gaps, 12)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
def test_4S8I6M(self):
"""Test alignment starting with soft clip followed by insertion."""
target_seq = Seq("CCCCCC")
query_seq = Seq("GGGGAAAAAAAACCCCCC")
target = SeqRecord(target_seq, id="target")
query = SeqRecord(query_seq, id="query")
sequences = [target, query]
coordinates = np.array([[0, 0, 6], [4, 12, 18]])
alignment = Alignment(sequences, coordinates)
self.assertEqual(
str(alignment),
"""\
target 0 --------CCCCCC 6
0 --------|||||| 14
query 4 AAAAAAAACCCCCC 18
""",
)
line = alignment.format("sam")
self.assertEqual(
line, "query\t0\ttarget\t1\t255\t4S8I6M\t*\t0\t0\tGGGGAAAAAAAACCCCCC\t*\n"
)
fields = line.split()
pos = int(fields[3]) - 1
self.assertEqual(pos, 0)
cigar = fields[5]
self.assertEqual(cigar, "4S8I6M")
stream = StringIO(line)
alignments = Align.parse(stream, "sam")
self.assertTrue(
np.array_equal(
np.array(alignment, "U"),
# fmt: off
np.array([['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
'C'],
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
'C']], dtype='U')
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 6:
identities = 6,
mismatches = 0.
gaps = 8:
left_gaps = 8:
left_insertions = 8:
open_left_insertions = 1,
extend_left_insertions = 7;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 8)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 8)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 8)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 8)
self.assertEqual(counts.aligned, 6)
self.assertEqual(counts.identities, 6)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
stream.close()
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
class TestAlign_strand(unittest.TestCase):
def test_format(self):
"""Test alignment with the target on the opposite strand."""
sequences = ["AACAGCAGCGTGTCG", "CAGCTAGCGAA"]
coordinates = np.array(
[[0, 2, 2, 3, 4, 6, 6, 9, 10, 12, 15], [11, 11, 9, 8, 8, 6, 5, 2, 2, 0, 0]]
)
alignment = Alignment(sequences, coordinates)
alignment.score = 8
line = """\
query 16 target 1 255 2D2I1M1D2M1I3M1D2M3D * 0 0 TTCGCTAGCTG * AS:i:8
"""
self.assertEqual(
str(alignment),
"""\
target 0 AA--CAGC-AGCGTGTCG 15
0 ----|-||-|||-||--- 18
query 11 --TTC-GCTAGC-TG--- 0
""",
)
self.assertEqual(format(alignment, "sam"), line)
alignment.coordinates = alignment.coordinates[:, ::-1]
self.assertEqual(
str(alignment),
"""\
target 15 CGACACGCT-GCTG--TT 0
0 ---||-|||-||-|---- 18
query 0 ---CA-GCTAGC-GAA-- 11
""",
)
self.assertEqual(format(alignment, "sam"), line)
alignment.coordinates = alignment.coordinates[:, ::-1]
line = """\
query 16 target 1 255 3D2M1D3M1I2M1D1M2I2D * 0 0 CAGCTAGCGAA *
"""
alignment = alignment.reverse_complement()
self.assertEqual(
str(alignment),
"""\
target 0 CGACACGCT-GCTG--TT 15
0 ---||-|||-||-|---- 18
query 11 ---CA-GCTAGC-GAA-- 0
""",
)
self.assertEqual(format(alignment, "sam"), line)
alignment.coordinates = alignment.coordinates[:, ::-1]
self.assertEqual(
str(alignment),
"""\
target 15 AA--CAGC-AGCGTGTCG 0
0 ----|-||-|||-||--- 18
query 0 --TTC-GCTAGC-TG--- 11
""",
)
self.assertEqual(format(alignment, "sam"), line)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)