mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
* update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * updat * update * update * update * update * update * update * update * update * update * update * update * update * halfway finished * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * pdate * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * replace precompiler #defines by inline functions * update * update * add tests * update * update * update * update * update * update * update * update * documentation * update * avoid stpcpy * pointer printing * compiler warning * testing without codonalign * testing without codonalign and pairwisealigner * compiler warning * adding codonalign and pairwisealigner back in * remove inline from check_indices * add inline to check_indices * update * add test line 12287 test_pairwise_aligner.py * update * update * update * update * change dtype from int32 to "i" * all done * testing * testing * testing * testing * done * done * fix how pointers are printed on Windows * update * update * update * update * fix id printing on pypy * style change only * Use Py_uintptr_t instead of uintptr_t * fix memory leak * remove double semicolon * check if GitHub actions are now pickup up Python version 3.13.5 without hardcoding it --------- Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local> Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp> Co-authored-by: Michiel de Hoon <michiel.dehoon@riken.jp>
9632 lines
383 KiB
Python
9632 lines
383 KiB
Python
# Copyright 2022 by Michiel de Hoon. All rights reserved.
|
|
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
"""Tests for Align.sam module."""
|
|
import unittest
|
|
from io import StringIO
|
|
|
|
from Bio import Align
|
|
from Bio import SeqIO
|
|
from Bio.Align import Alignment
|
|
from Bio.Seq import Seq
|
|
from Bio.SeqRecord import SeqRecord
|
|
|
|
try:
|
|
import numpy as np
|
|
except ImportError:
|
|
from Bio import MissingPythonDependencyError
|
|
|
|
raise MissingPythonDependencyError(
|
|
"Install numpy if you want to use Bio.Align.sam."
|
|
) from None
|
|
|
|
|
|
class TestAlign_dna_rna(unittest.TestCase):
|
|
# The SAM file dna_rna.sam was generated using these commands:
|
|
# twoBitToFa hg38.2bit stdout | samtools dict -a hg38 -s "Homo sapiens" | grep -v chrUn | grep -v alt | grep -v random > dna_rna.sam
|
|
# psl2sam.pl dna_rna.psl >> dna_rna.sam
|
|
# The CIGAR string was then edited to replace D by N for introns and H by S
|
|
# where appropriate.
|
|
# The alignment scores (AS tag) were copied from the BED file dna_rna.bed.
|
|
|
|
def setUp(self):
|
|
data = {}
|
|
records = SeqIO.parse("Blat/dna.fa", "fasta")
|
|
for record in records:
|
|
name, start_end = record.id.split(":")
|
|
assert name == "chr3"
|
|
start, end = start_end.split("-")
|
|
start = int(start)
|
|
end = int(end)
|
|
sequence = str(record.seq).upper()
|
|
assert len(sequence) == end - start
|
|
data[start] = sequence
|
|
self.dna = Seq(data, length=198295559)
|
|
records = SeqIO.parse("Blat/rna.fa", "fasta")
|
|
self.rna = {record.id: record.seq for record in records}
|
|
self.rna["NR_111921.1"] = self.rna["NR_111921.1"][:-12]
|
|
self.rna["NR_111921.1_modified"] = self.rna["NR_111921.1_modified"][:-12]
|
|
# Last 12 nucleotides were clipped by Blat as the poly(A) tail
|
|
|
|
def check_alignments(self, alignments):
|
|
"""Check the alignments."""
|
|
self.assertEqual(list(alignments.metadata), ["HD"])
|
|
self.assertEqual(alignments.metadata["HD"], {"VN": "1.0", "SO": "unsorted"})
|
|
self.assertEqual(len(alignments.targets), 25)
|
|
self.assertEqual(alignments.targets[0].id, "chr1")
|
|
self.assertEqual(len(alignments.targets[0]), 248956422)
|
|
self.assertEqual(
|
|
alignments.targets[0].annotations,
|
|
{
|
|
"MD5": "2648ae1bacce4ec4b6cf337dcae37816",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[1].id, "chr10")
|
|
self.assertEqual(len(alignments.targets[1]), 133797422)
|
|
self.assertEqual(
|
|
alignments.targets[1].annotations,
|
|
{
|
|
"MD5": "907112d17fcb73bcab1ed1c72b97ce68",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[2].id, "chr11")
|
|
self.assertEqual(len(alignments.targets[2]), 135086622)
|
|
self.assertEqual(
|
|
alignments.targets[2].annotations,
|
|
{
|
|
"MD5": "1511375dc2dd1b633af8cf439ae90cec",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[3].id, "chr12")
|
|
self.assertEqual(len(alignments.targets[3]), 133275309)
|
|
self.assertEqual(
|
|
alignments.targets[3].annotations,
|
|
{
|
|
"MD5": "e81e16d3f44337034695a29b97708fce",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[4].id, "chr13")
|
|
self.assertEqual(len(alignments.targets[4]), 114364328)
|
|
self.assertEqual(
|
|
alignments.targets[4].annotations,
|
|
{
|
|
"MD5": "17dab79b963ccd8e7377cef59a54fe1c",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[5].id, "chr14")
|
|
self.assertEqual(len(alignments.targets[5]), 107043718)
|
|
self.assertEqual(
|
|
alignments.targets[5].annotations,
|
|
{
|
|
"MD5": "acbd9552c059d9b403e75ed26c1ce5bc",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[6].id, "chr15")
|
|
self.assertEqual(len(alignments.targets[6]), 101991189)
|
|
self.assertEqual(
|
|
alignments.targets[6].annotations,
|
|
{
|
|
"MD5": "f036bd11158407596ca6bf3581454706",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[7].id, "chr16")
|
|
self.assertEqual(len(alignments.targets[7]), 90338345)
|
|
self.assertEqual(
|
|
alignments.targets[7].annotations,
|
|
{
|
|
"MD5": "24e7cabfba3548a2bb4dff582b9ee870",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[8].id, "chr17")
|
|
self.assertEqual(len(alignments.targets[8]), 83257441)
|
|
self.assertEqual(
|
|
alignments.targets[8].annotations,
|
|
{
|
|
"MD5": "a8499ca51d6fb77332c2d242923994eb",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[9].id, "chr18")
|
|
self.assertEqual(len(alignments.targets[9]), 80373285)
|
|
self.assertEqual(
|
|
alignments.targets[9].annotations,
|
|
{
|
|
"MD5": "11eeaa801f6b0e2e36a1138616b8ee9a",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[10].id, "chr19")
|
|
self.assertEqual(len(alignments.targets[10]), 58617616)
|
|
self.assertEqual(
|
|
alignments.targets[10].annotations,
|
|
{
|
|
"MD5": "b0eba2c7bb5c953d1e06a508b5e487de",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[11].id, "chr2")
|
|
self.assertEqual(len(alignments.targets[11]), 242193529)
|
|
self.assertEqual(
|
|
alignments.targets[11].annotations,
|
|
{
|
|
"MD5": "4bb4f82880a14111eb7327169ffb729b",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[12].id, "chr20")
|
|
self.assertEqual(len(alignments.targets[12]), 64444167)
|
|
self.assertEqual(
|
|
alignments.targets[12].annotations,
|
|
{
|
|
"MD5": "b18e6c531b0bd70e949a7fc20859cb01",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[13].id, "chr21")
|
|
self.assertEqual(len(alignments.targets[13]), 46709983)
|
|
self.assertEqual(
|
|
alignments.targets[13].annotations,
|
|
{
|
|
"MD5": "2f45a3455007b7e271509161e52954a9",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[14].id, "chr22")
|
|
self.assertEqual(len(alignments.targets[14]), 50818468)
|
|
self.assertEqual(
|
|
alignments.targets[14].annotations,
|
|
{
|
|
"MD5": "221733a2a15e2de66d33e73d126c5109",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[15].id, "chr3")
|
|
self.assertEqual(len(alignments.targets[15]), 198295559)
|
|
self.assertEqual(
|
|
alignments.targets[15].annotations,
|
|
{
|
|
"MD5": "a48af509898d3736ba95dc0912c0b461",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[16].id, "chr4")
|
|
self.assertEqual(len(alignments.targets[16]), 190214555)
|
|
self.assertEqual(
|
|
alignments.targets[16].annotations,
|
|
{
|
|
"MD5": "3210fecf1eb92d5489da4346b3fddc6e",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[17].id, "chr5")
|
|
self.assertEqual(len(alignments.targets[17]), 181538259)
|
|
self.assertEqual(
|
|
alignments.targets[17].annotations,
|
|
{
|
|
"MD5": "f7f05fb7ceea78cbc32ce652c540ff2d",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[18].id, "chr6")
|
|
self.assertEqual(len(alignments.targets[18]), 170805979)
|
|
self.assertEqual(
|
|
alignments.targets[18].annotations,
|
|
{
|
|
"MD5": "6a48dfa97e854e3c6f186c8ff973f7dd",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[19].id, "chr7")
|
|
self.assertEqual(len(alignments.targets[19]), 159345973)
|
|
self.assertEqual(
|
|
alignments.targets[19].annotations,
|
|
{
|
|
"MD5": "94eef2b96fd5a7c8db162c8c74378039",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[20].id, "chr8")
|
|
self.assertEqual(len(alignments.targets[20]), 145138636)
|
|
self.assertEqual(
|
|
alignments.targets[20].annotations,
|
|
{
|
|
"MD5": "c67955b5f7815a9a1edfaa15893d3616",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[21].id, "chr9")
|
|
self.assertEqual(len(alignments.targets[21]), 138394717)
|
|
self.assertEqual(
|
|
alignments.targets[21].annotations,
|
|
{
|
|
"MD5": "addd2795560986b7491c40b1faa3978a",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[22].id, "chrM")
|
|
self.assertEqual(len(alignments.targets[22]), 16569)
|
|
self.assertEqual(
|
|
alignments.targets[22].annotations,
|
|
{
|
|
"MD5": "c68f52674c9fb33aef52dcf399755519",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[23].id, "chrX")
|
|
self.assertEqual(len(alignments.targets[23]), 156040895)
|
|
self.assertEqual(
|
|
alignments.targets[23].annotations,
|
|
{
|
|
"MD5": "49527016a48497d9d1cbd8e4a9049bd3",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
self.assertEqual(alignments.targets[24].id, "chrY")
|
|
self.assertEqual(len(alignments.targets[24]), 57227415)
|
|
self.assertEqual(
|
|
alignments.targets[24].annotations,
|
|
{
|
|
"MD5": "b2b7e6369564d89059e763cd6e736837",
|
|
"assembly": "hg38",
|
|
"species": "Homo sapiens",
|
|
},
|
|
)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 1711))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr3")
|
|
self.assertEqual(alignment.query.id, "NR_046654.1")
|
|
self.assertEqual(len(alignment.target.seq), len(self.dna))
|
|
self.assertEqual(
|
|
alignment.target.seq.defined_ranges,
|
|
((42530895, 42530958), (42532020, 42532095), (42532563, 42532606)),
|
|
)
|
|
for start, end in alignment.target.seq.defined_ranges:
|
|
self.assertEqual(alignment.target.seq[start:end], self.dna[start:end])
|
|
self.assertEqual(alignment.query.seq, self.rna[alignment.query.id])
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[42530895, 42530958, 42532020, 42532095, 42532563, 42532606],
|
|
[ 181, 118, 118, 43, 43, 0]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.substitutions,
|
|
# fmt: off
|
|
np.array([[38., 0., 0., 0.],
|
|
[ 0., 41., 0., 0.],
|
|
[ 0., 0., 60., 0.],
|
|
[ 0., 0., 0., 42.],
|
|
])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(alignment.substitutions.alphabet, "ACGT")
|
|
self.assertEqual(alignment.mapq, 0)
|
|
matches = sum(
|
|
alignment.substitutions[c, c] for c in alignment.substitutions.alphabet
|
|
)
|
|
self.assertEqual(alignment.score, 1000)
|
|
self.assertEqual(alignment.annotations["NM"], 0)
|
|
self.assertNotIn("hard_clip_left", alignment.query.annotations)
|
|
self.assertNotIn("hard_clip_right", alignment.query.annotations)
|
|
self.assertEqual(alignment.operations, bytearray(b"MNMNM"))
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr3 42530895 CGGAAGTACTTCTGGGGGTACATACTCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCC
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
NR_046654 181 CGGAAGTACTTCTGGGGGTACATACTCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCC
|
|
|
|
chr3 42530955 AGG?????????????????????????????????????????????????????????
|
|
60 |||---------------------------------------------------------
|
|
NR_046654 121 AGG---------------------------------------------------------
|
|
|
|
chr3 42531015 ????????????????????????????????????????????????????????????
|
|
120 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531075 ????????????????????????????????????????????????????????????
|
|
180 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531135 ????????????????????????????????????????????????????????????
|
|
240 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531195 ????????????????????????????????????????????????????????????
|
|
300 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531255 ????????????????????????????????????????????????????????????
|
|
360 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531315 ????????????????????????????????????????????????????????????
|
|
420 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531375 ????????????????????????????????????????????????????????????
|
|
480 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531435 ????????????????????????????????????????????????????????????
|
|
540 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531495 ????????????????????????????????????????????????????????????
|
|
600 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531555 ????????????????????????????????????????????????????????????
|
|
660 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531615 ????????????????????????????????????????????????????????????
|
|
720 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531675 ????????????????????????????????????????????????????????????
|
|
780 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531735 ????????????????????????????????????????????????????????????
|
|
840 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531795 ????????????????????????????????????????????????????????????
|
|
900 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531855 ????????????????????????????????????????????????????????????
|
|
960 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531915 ????????????????????????????????????????????????????????????
|
|
1020 ------------------------------------------------------------
|
|
NR_046654 118 ------------------------------------------------------------
|
|
|
|
chr3 42531975 ?????????????????????????????????????????????CAGTTCTTCCTTGAG
|
|
1080 ---------------------------------------------|||||||||||||||
|
|
NR_046654 118 ---------------------------------------------CAGTTCTTCCTTGAG
|
|
|
|
chr3 42532035 CGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGAC
|
|
1140 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
NR_046654 103 CGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGAC
|
|
|
|
chr3 42532095 ????????????????????????????????????????????????????????????
|
|
1200 ------------------------------------------------------------
|
|
NR_046654 43 ------------------------------------------------------------
|
|
|
|
chr3 42532155 ????????????????????????????????????????????????????????????
|
|
1260 ------------------------------------------------------------
|
|
NR_046654 43 ------------------------------------------------------------
|
|
|
|
chr3 42532215 ????????????????????????????????????????????????????????????
|
|
1320 ------------------------------------------------------------
|
|
NR_046654 43 ------------------------------------------------------------
|
|
|
|
chr3 42532275 ????????????????????????????????????????????????????????????
|
|
1380 ------------------------------------------------------------
|
|
NR_046654 43 ------------------------------------------------------------
|
|
|
|
chr3 42532335 ????????????????????????????????????????????????????????????
|
|
1440 ------------------------------------------------------------
|
|
NR_046654 43 ------------------------------------------------------------
|
|
|
|
chr3 42532395 ????????????????????????????????????????????????????????????
|
|
1500 ------------------------------------------------------------
|
|
NR_046654 43 ------------------------------------------------------------
|
|
|
|
chr3 42532455 ????????????????????????????????????????????????????????????
|
|
1560 ------------------------------------------------------------
|
|
NR_046654 43 ------------------------------------------------------------
|
|
|
|
chr3 42532515 ????????????????????????????????????????????????CTAGCATCCTTC
|
|
1620 ------------------------------------------------||||||||||||
|
|
NR_046654 43 ------------------------------------------------CTAGCATCCTTC
|
|
|
|
chr3 42532575 CCAGGTATGCATCTGCTGCCAAGCCAGGGAG 42532606
|
|
1680 ||||||||||||||||||||||||||||||| 1711
|
|
NR_046654 31 CCAGGTATGCATCTGCTGCCAAGCCAGGGAG 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
NR_046654.1 16 chr3 42530896 0 63M1062N75M468N43M * 0 0 CGGAAGTACTTCTGGGGGTACATACTCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCCAGGCAGTTCTTCCTTGAGCGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGACCTAGCATCCTTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAG * AS:i:1000 NM:i:0
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (181 aligned letters; 181 identities; 0 mismatches; 1530 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 181:
|
|
identities = 181,
|
|
mismatches = 0.
|
|
gaps = 1530:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 1530:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 1530:
|
|
open_internal_deletions = 2,
|
|
extend_internal_deletions = 1528;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 1530)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 1530)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 1530)
|
|
self.assertEqual(counts.gaps, 1530)
|
|
self.assertEqual(counts.aligned, 181)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 1714))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr3")
|
|
self.assertEqual(alignment.query.id, "NR_046654.1_modified")
|
|
self.assertEqual(len(alignment.target.seq), len(self.dna))
|
|
self.assertEqual(
|
|
alignment.target.seq.defined_ranges,
|
|
((42530895, 42530958), (42532020, 42532095), (42532563, 42532606)),
|
|
)
|
|
for start, end in alignment.target.seq.defined_ranges:
|
|
self.assertEqual(alignment.target.seq[start:end], self.dna[start:end])
|
|
self.assertEqual(alignment.query.seq, self.rna[alignment.query.id])
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[42530895, 42530922, 42530922, 42530958, 42532020,
|
|
42532037, 42532039, 42532095, 42532563, 42532606],
|
|
[ 185, 158, 155, 119, 119,
|
|
102, 102, 46, 46, 3],
|
|
])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.substitutions,
|
|
# fmt: off
|
|
np.array([[36., 0., 0., 1.],
|
|
[ 0., 41., 0., 0.],
|
|
[ 0., 0., 60., 0.],
|
|
[ 0., 0., 0., 41.],
|
|
]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(alignment.substitutions.alphabet, "ACGT")
|
|
self.assertEqual(alignment.mapq, 0)
|
|
self.assertEqual(alignment.score, 978)
|
|
self.assertEqual(alignment.annotations["NM"], 6)
|
|
self.assertNotIn("hard_clip_left", alignment.query.annotations)
|
|
self.assertNotIn("hard_clip_right", alignment.query.annotations)
|
|
self.assertEqual(alignment.operations, bytearray(b"MIMNMDMNM"))
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr3 42530895 CGGAAGTACTTCTGGGGGTACATACTC---ATCGGCTGGGGTATGGTACCAGGGAGGGCT
|
|
0 |||||||||||||||||||||||||||---||||||||||||||||||||||||||||||
|
|
NR_046654 185 CGGAAGTACTTCTGGGGGTACATACTCCCCATCGGCTGGGGTATGGTACCAGGGAGGGCT
|
|
|
|
chr3 42530952 TCCAGG??????????????????????????????????????????????????????
|
|
60 ||||||------------------------------------------------------
|
|
NR_046654 125 TCCAGG------------------------------------------------------
|
|
|
|
chr3 42531012 ????????????????????????????????????????????????????????????
|
|
120 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531072 ????????????????????????????????????????????????????????????
|
|
180 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531132 ????????????????????????????????????????????????????????????
|
|
240 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531192 ????????????????????????????????????????????????????????????
|
|
300 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531252 ????????????????????????????????????????????????????????????
|
|
360 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531312 ????????????????????????????????????????????????????????????
|
|
420 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531372 ????????????????????????????????????????????????????????????
|
|
480 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531432 ????????????????????????????????????????????????????????????
|
|
540 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531492 ????????????????????????????????????????????????????????????
|
|
600 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531552 ????????????????????????????????????????????????????????????
|
|
660 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531612 ????????????????????????????????????????????????????????????
|
|
720 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531672 ????????????????????????????????????????????????????????????
|
|
780 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531732 ????????????????????????????????????????????????????????????
|
|
840 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531792 ????????????????????????????????????????????????????????????
|
|
900 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531852 ????????????????????????????????????????????????????????????
|
|
960 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531912 ????????????????????????????????????????????????????????????
|
|
1020 ------------------------------------------------------------
|
|
NR_046654 119 ------------------------------------------------------------
|
|
|
|
chr3 42531972 ????????????????????????????????????????????????CAGTTCTTCCTT
|
|
1080 ------------------------------------------------||||||||||||
|
|
NR_046654 119 ------------------------------------------------CAGTTCTTCCTT
|
|
|
|
chr3 42532032 GAGCGTAAGCGGATTGGGAGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGAT
|
|
1140 |||||--|||||||||||.|||||||||||||||||||||||||||||||||||||||||
|
|
NR_046654 107 GAGCG--AGCGGATTGGGTGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGAT
|
|
|
|
chr3 42532092 GAC?????????????????????????????????????????????????????????
|
|
1200 |||---------------------------------------------------------
|
|
NR_046654 49 GAC---------------------------------------------------------
|
|
|
|
chr3 42532152 ????????????????????????????????????????????????????????????
|
|
1260 ------------------------------------------------------------
|
|
NR_046654 46 ------------------------------------------------------------
|
|
|
|
chr3 42532212 ????????????????????????????????????????????????????????????
|
|
1320 ------------------------------------------------------------
|
|
NR_046654 46 ------------------------------------------------------------
|
|
|
|
chr3 42532272 ????????????????????????????????????????????????????????????
|
|
1380 ------------------------------------------------------------
|
|
NR_046654 46 ------------------------------------------------------------
|
|
|
|
chr3 42532332 ????????????????????????????????????????????????????????????
|
|
1440 ------------------------------------------------------------
|
|
NR_046654 46 ------------------------------------------------------------
|
|
|
|
chr3 42532392 ????????????????????????????????????????????????????????????
|
|
1500 ------------------------------------------------------------
|
|
NR_046654 46 ------------------------------------------------------------
|
|
|
|
chr3 42532452 ????????????????????????????????????????????????????????????
|
|
1560 ------------------------------------------------------------
|
|
NR_046654 46 ------------------------------------------------------------
|
|
|
|
chr3 42532512 ???????????????????????????????????????????????????CTAGCATCC
|
|
1620 ---------------------------------------------------|||||||||
|
|
NR_046654 46 ---------------------------------------------------CTAGCATCC
|
|
|
|
chr3 42532572 TTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAG 42532606
|
|
1680 |||||||||||||||||||||||||||||||||| 1714
|
|
NR_046654 37 TTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAG 3
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
NR_046654.1_modified 16 chr3 42530896 0 5S27M3I36M1062N17M2D56M468N43M3S * 0 0 AAAAACGGAAGTACTTCTGGGGGTACATACTCCCCATCGGCTGGGGTATGGTACCAGGGAGGGCTTCCAGGCAGTTCTTCCTTGAGCGAGCGGATTGGGTGCACAGTCCTTAGGGATTTGAAGGAGGTAGAGTTCCCGGATGACCTAGCATCCTTCCCAGGTATGCATCTGCTGCCAAGCCAGGGAGAAA * AS:i:978 NM:i:6
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (179 aligned letters; 178 identities; 1 mismatches; 1535 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 179:
|
|
identities = 178,
|
|
mismatches = 1.
|
|
gaps = 1535:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 1535:
|
|
internal_insertions = 3:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 2;
|
|
internal_deletions = 1532:
|
|
open_internal_deletions = 3,
|
|
extend_internal_deletions = 1529;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 3)
|
|
self.assertEqual(counts.internal_deletions, 1532)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 1535)
|
|
self.assertEqual(counts.insertions, 3)
|
|
self.assertEqual(counts.deletions, 1532)
|
|
self.assertEqual(counts.gaps, 1535)
|
|
self.assertEqual(counts.aligned, 179)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 5407))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr3")
|
|
self.assertEqual(alignment.query.id, "NR_111921.1")
|
|
self.assertEqual(len(alignment.target.seq), len(self.dna))
|
|
self.assertEqual(
|
|
alignment.target.seq.defined_ranges,
|
|
((48663767, 48663813), (48665640, 48665722), (48669098, 48669174)),
|
|
)
|
|
for start, end in alignment.target.seq.defined_ranges:
|
|
self.assertEqual(alignment.target.seq[start:end], self.dna[start:end])
|
|
self.assertEqual(alignment.query.seq, self.rna[alignment.query.id])
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array( [[48663767, 48663813, 48665640, 48665722, 48669098, 48669174],
|
|
[ 0, 46, 46, 128, 128, 204]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.substitutions,
|
|
# fmt: off
|
|
np.array([[62., 0., 0., 0.],
|
|
[ 0., 42., 0., 0.],
|
|
[ 0., 0., 66., 0.],
|
|
[ 0., 0., 0., 34.],
|
|
])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(alignment.substitutions.alphabet, "ACGT")
|
|
self.assertEqual(alignment.mapq, 0)
|
|
self.assertEqual(alignment.score, 1000)
|
|
self.assertEqual(alignment.annotations["NM"], 0)
|
|
self.assertNotIn("hard_clip_left", alignment.query.annotations)
|
|
self.assertEqual(alignment.query.annotations["hard_clip_right"], 12)
|
|
self.assertEqual(alignment.operations, bytearray(b"MNMNM"))
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr3 48663767 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCG??????????????
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||--------------
|
|
NR_111921 0 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCG--------------
|
|
|
|
chr3 48663827 ????????????????????????????????????????????????????????????
|
|
60 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48663887 ????????????????????????????????????????????????????????????
|
|
120 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48663947 ????????????????????????????????????????????????????????????
|
|
180 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664007 ????????????????????????????????????????????????????????????
|
|
240 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664067 ????????????????????????????????????????????????????????????
|
|
300 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664127 ????????????????????????????????????????????????????????????
|
|
360 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664187 ????????????????????????????????????????????????????????????
|
|
420 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664247 ????????????????????????????????????????????????????????????
|
|
480 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664307 ????????????????????????????????????????????????????????????
|
|
540 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664367 ????????????????????????????????????????????????????????????
|
|
600 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664427 ????????????????????????????????????????????????????????????
|
|
660 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664487 ????????????????????????????????????????????????????????????
|
|
720 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664547 ????????????????????????????????????????????????????????????
|
|
780 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664607 ????????????????????????????????????????????????????????????
|
|
840 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664667 ????????????????????????????????????????????????????????????
|
|
900 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664727 ????????????????????????????????????????????????????????????
|
|
960 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664787 ????????????????????????????????????????????????????????????
|
|
1020 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664847 ????????????????????????????????????????????????????????????
|
|
1080 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664907 ????????????????????????????????????????????????????????????
|
|
1140 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48664967 ????????????????????????????????????????????????????????????
|
|
1200 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665027 ????????????????????????????????????????????????????????????
|
|
1260 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665087 ????????????????????????????????????????????????????????????
|
|
1320 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665147 ????????????????????????????????????????????????????????????
|
|
1380 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665207 ????????????????????????????????????????????????????????????
|
|
1440 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665267 ????????????????????????????????????????????????????????????
|
|
1500 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665327 ????????????????????????????????????????????????????????????
|
|
1560 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665387 ????????????????????????????????????????????????????????????
|
|
1620 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665447 ????????????????????????????????????????????????????????????
|
|
1680 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665507 ????????????????????????????????????????????????????????????
|
|
1740 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665567 ????????????????????????????????????????????????????????????
|
|
1800 ------------------------------------------------------------
|
|
NR_111921 46 ------------------------------------------------------------
|
|
|
|
chr3 48665627 ?????????????CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
|
|
1860 -------------|||||||||||||||||||||||||||||||||||||||||||||||
|
|
NR_111921 46 -------------CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
|
|
|
|
chr3 48665687 TGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGG?????????????????????????
|
|
1920 |||||||||||||||||||||||||||||||||||-------------------------
|
|
NR_111921 93 TGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGG-------------------------
|
|
|
|
chr3 48665747 ????????????????????????????????????????????????????????????
|
|
1980 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48665807 ????????????????????????????????????????????????????????????
|
|
2040 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48665867 ????????????????????????????????????????????????????????????
|
|
2100 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48665927 ????????????????????????????????????????????????????????????
|
|
2160 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48665987 ????????????????????????????????????????????????????????????
|
|
2220 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666047 ????????????????????????????????????????????????????????????
|
|
2280 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666107 ????????????????????????????????????????????????????????????
|
|
2340 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666167 ????????????????????????????????????????????????????????????
|
|
2400 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666227 ????????????????????????????????????????????????????????????
|
|
2460 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666287 ????????????????????????????????????????????????????????????
|
|
2520 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666347 ????????????????????????????????????????????????????????????
|
|
2580 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666407 ????????????????????????????????????????????????????????????
|
|
2640 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666467 ????????????????????????????????????????????????????????????
|
|
2700 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666527 ????????????????????????????????????????????????????????????
|
|
2760 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666587 ????????????????????????????????????????????????????????????
|
|
2820 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666647 ????????????????????????????????????????????????????????????
|
|
2880 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666707 ????????????????????????????????????????????????????????????
|
|
2940 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666767 ????????????????????????????????????????????????????????????
|
|
3000 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666827 ????????????????????????????????????????????????????????????
|
|
3060 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666887 ????????????????????????????????????????????????????????????
|
|
3120 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48666947 ????????????????????????????????????????????????????????????
|
|
3180 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667007 ????????????????????????????????????????????????????????????
|
|
3240 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667067 ????????????????????????????????????????????????????????????
|
|
3300 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667127 ????????????????????????????????????????????????????????????
|
|
3360 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667187 ????????????????????????????????????????????????????????????
|
|
3420 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667247 ????????????????????????????????????????????????????????????
|
|
3480 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667307 ????????????????????????????????????????????????????????????
|
|
3540 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667367 ????????????????????????????????????????????????????????????
|
|
3600 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667427 ????????????????????????????????????????????????????????????
|
|
3660 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667487 ????????????????????????????????????????????????????????????
|
|
3720 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667547 ????????????????????????????????????????????????????????????
|
|
3780 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667607 ????????????????????????????????????????????????????????????
|
|
3840 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667667 ????????????????????????????????????????????????????????????
|
|
3900 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667727 ????????????????????????????????????????????????????????????
|
|
3960 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667787 ????????????????????????????????????????????????????????????
|
|
4020 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667847 ????????????????????????????????????????????????????????????
|
|
4080 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667907 ????????????????????????????????????????????????????????????
|
|
4140 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48667967 ????????????????????????????????????????????????????????????
|
|
4200 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668027 ????????????????????????????????????????????????????????????
|
|
4260 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668087 ????????????????????????????????????????????????????????????
|
|
4320 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668147 ????????????????????????????????????????????????????????????
|
|
4380 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668207 ????????????????????????????????????????????????????????????
|
|
4440 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668267 ????????????????????????????????????????????????????????????
|
|
4500 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668327 ????????????????????????????????????????????????????????????
|
|
4560 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668387 ????????????????????????????????????????????????????????????
|
|
4620 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668447 ????????????????????????????????????????????????????????????
|
|
4680 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668507 ????????????????????????????????????????????????????????????
|
|
4740 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668567 ????????????????????????????????????????????????????????????
|
|
4800 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668627 ????????????????????????????????????????????????????????????
|
|
4860 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668687 ????????????????????????????????????????????????????????????
|
|
4920 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668747 ????????????????????????????????????????????????????????????
|
|
4980 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668807 ????????????????????????????????????????????????????????????
|
|
5040 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668867 ????????????????????????????????????????????????????????????
|
|
5100 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668927 ????????????????????????????????????????????????????????????
|
|
5160 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48668987 ????????????????????????????????????????????????????????????
|
|
5220 ------------------------------------------------------------
|
|
NR_111921 128 ------------------------------------------------------------
|
|
|
|
chr3 48669047 ???????????????????????????????????????????????????AGACGGGAG
|
|
5280 ---------------------------------------------------|||||||||
|
|
NR_111921 128 ---------------------------------------------------AGACGGGAG
|
|
|
|
chr3 48669107 CTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATAT
|
|
5340 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
NR_111921 137 CTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATAT
|
|
|
|
chr3 48669167 TAAAAAA 48669174
|
|
5400 ||||||| 5407
|
|
NR_111921 197 TAAAAAA 204
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
NR_111921.1 0 chr3 48663768 0 46M1827N82M3376N76M12H * 0 0 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCGCTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGCTGCTGGGCGGCAGATGGAGCGATCAGAAGACCAGGAGACGGGAGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATATTAAAAAA * AS:i:1000 NM:i:0
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (204 aligned letters; 204 identities; 0 mismatches; 5203 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 204:
|
|
identities = 204,
|
|
mismatches = 0.
|
|
gaps = 5203:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 5203:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 5203:
|
|
open_internal_deletions = 2,
|
|
extend_internal_deletions = 5201;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 5203)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 5203)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 5203)
|
|
self.assertEqual(counts.gaps, 5203)
|
|
self.assertEqual(counts.aligned, 204)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 5409))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr3")
|
|
self.assertEqual(alignment.query.id, "NR_111921.1_modified")
|
|
self.assertEqual(len(alignment.target.seq), len(self.dna))
|
|
self.assertEqual(
|
|
alignment.target.seq.defined_ranges,
|
|
((48663767, 48663813), (48665640, 48665722), (48669098, 48669174)),
|
|
)
|
|
for start, end in alignment.target.seq.defined_ranges:
|
|
self.assertEqual(alignment.target.seq[start:end], self.dna[start:end])
|
|
self.assertEqual(alignment.query.seq, self.rna[alignment.query.id])
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[48663767, 48663795, 48663796, 48663813, 48665640,
|
|
48665716, 48665716, 48665722, 48669098, 48669174],
|
|
[ 3, 31, 31, 48, 48,
|
|
124, 126, 132, 132, 208],
|
|
])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.substitutions,
|
|
# fmt: off
|
|
np.array([[62., 0., 0., 0.],
|
|
[ 0., 41., 0., 0.],
|
|
[ 0., 2., 64., 0.],
|
|
[ 0., 0., 0., 34.],
|
|
]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(alignment.substitutions.alphabet, "ACGT")
|
|
self.assertEqual(alignment.mapq, 0)
|
|
self.assertEqual(alignment.score, 972)
|
|
self.assertEqual(alignment.annotations["NM"], 5)
|
|
self.assertNotIn("hard_clip_left", alignment.query.annotations)
|
|
self.assertEqual(alignment.query.annotations["hard_clip_right"], 12)
|
|
self.assertEqual(alignment.operations, bytearray(b"MDMNMIMNM"))
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr3 48663767 CACGAGAGGAGCGGAGGCGAGGGGTGAACGCGGAGCACTCCAATCG??????????????
|
|
0 ||||||||||||||||||||||||||||-|||||||||||||||||--------------
|
|
NR_111921 3 CACGAGAGGAGCGGAGGCGAGGGGTGAA-GCGGAGCACTCCAATCG--------------
|
|
|
|
chr3 48663827 ????????????????????????????????????????????????????????????
|
|
60 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48663887 ????????????????????????????????????????????????????????????
|
|
120 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48663947 ????????????????????????????????????????????????????????????
|
|
180 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664007 ????????????????????????????????????????????????????????????
|
|
240 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664067 ????????????????????????????????????????????????????????????
|
|
300 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664127 ????????????????????????????????????????????????????????????
|
|
360 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664187 ????????????????????????????????????????????????????????????
|
|
420 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664247 ????????????????????????????????????????????????????????????
|
|
480 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664307 ????????????????????????????????????????????????????????????
|
|
540 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664367 ????????????????????????????????????????????????????????????
|
|
600 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664427 ????????????????????????????????????????????????????????????
|
|
660 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664487 ????????????????????????????????????????????????????????????
|
|
720 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664547 ????????????????????????????????????????????????????????????
|
|
780 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664607 ????????????????????????????????????????????????????????????
|
|
840 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664667 ????????????????????????????????????????????????????????????
|
|
900 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664727 ????????????????????????????????????????????????????????????
|
|
960 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664787 ????????????????????????????????????????????????????????????
|
|
1020 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664847 ????????????????????????????????????????????????????????????
|
|
1080 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664907 ????????????????????????????????????????????????????????????
|
|
1140 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48664967 ????????????????????????????????????????????????????????????
|
|
1200 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665027 ????????????????????????????????????????????????????????????
|
|
1260 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665087 ????????????????????????????????????????????????????????????
|
|
1320 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665147 ????????????????????????????????????????????????????????????
|
|
1380 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665207 ????????????????????????????????????????????????????????????
|
|
1440 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665267 ????????????????????????????????????????????????????????????
|
|
1500 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665327 ????????????????????????????????????????????????????????????
|
|
1560 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665387 ????????????????????????????????????????????????????????????
|
|
1620 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665447 ????????????????????????????????????????????????????????????
|
|
1680 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665507 ????????????????????????????????????????????????????????????
|
|
1740 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665567 ????????????????????????????????????????????????????????????
|
|
1800 ------------------------------------------------------------
|
|
NR_111921 48 ------------------------------------------------------------
|
|
|
|
chr3 48665627 ?????????????CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
|
|
1860 -------------|||||||||||||||||||||||||||||||||||||||||||||||
|
|
NR_111921 48 -------------CTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGC
|
|
|
|
chr3 48665687 TGCTGGGCGGCAGATGGAGCGATCAGAAG--ACCAGG???????????????????????
|
|
1920 |||||..||||||||||||||||||||||--||||||-----------------------
|
|
NR_111921 95 TGCTGCCCGGCAGATGGAGCGATCAGAAGCCACCAGG-----------------------
|
|
|
|
chr3 48665745 ????????????????????????????????????????????????????????????
|
|
1980 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48665805 ????????????????????????????????????????????????????????????
|
|
2040 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48665865 ????????????????????????????????????????????????????????????
|
|
2100 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48665925 ????????????????????????????????????????????????????????????
|
|
2160 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48665985 ????????????????????????????????????????????????????????????
|
|
2220 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666045 ????????????????????????????????????????????????????????????
|
|
2280 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666105 ????????????????????????????????????????????????????????????
|
|
2340 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666165 ????????????????????????????????????????????????????????????
|
|
2400 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666225 ????????????????????????????????????????????????????????????
|
|
2460 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666285 ????????????????????????????????????????????????????????????
|
|
2520 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666345 ????????????????????????????????????????????????????????????
|
|
2580 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666405 ????????????????????????????????????????????????????????????
|
|
2640 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666465 ????????????????????????????????????????????????????????????
|
|
2700 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666525 ????????????????????????????????????????????????????????????
|
|
2760 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666585 ????????????????????????????????????????????????????????????
|
|
2820 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666645 ????????????????????????????????????????????????????????????
|
|
2880 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666705 ????????????????????????????????????????????????????????????
|
|
2940 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666765 ????????????????????????????????????????????????????????????
|
|
3000 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666825 ????????????????????????????????????????????????????????????
|
|
3060 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666885 ????????????????????????????????????????????????????????????
|
|
3120 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48666945 ????????????????????????????????????????????????????????????
|
|
3180 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667005 ????????????????????????????????????????????????????????????
|
|
3240 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667065 ????????????????????????????????????????????????????????????
|
|
3300 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667125 ????????????????????????????????????????????????????????????
|
|
3360 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667185 ????????????????????????????????????????????????????????????
|
|
3420 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667245 ????????????????????????????????????????????????????????????
|
|
3480 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667305 ????????????????????????????????????????????????????????????
|
|
3540 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667365 ????????????????????????????????????????????????????????????
|
|
3600 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667425 ????????????????????????????????????????????????????????????
|
|
3660 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667485 ????????????????????????????????????????????????????????????
|
|
3720 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667545 ????????????????????????????????????????????????????????????
|
|
3780 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667605 ????????????????????????????????????????????????????????????
|
|
3840 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667665 ????????????????????????????????????????????????????????????
|
|
3900 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667725 ????????????????????????????????????????????????????????????
|
|
3960 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667785 ????????????????????????????????????????????????????????????
|
|
4020 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667845 ????????????????????????????????????????????????????????????
|
|
4080 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667905 ????????????????????????????????????????????????????????????
|
|
4140 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48667965 ????????????????????????????????????????????????????????????
|
|
4200 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668025 ????????????????????????????????????????????????????????????
|
|
4260 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668085 ????????????????????????????????????????????????????????????
|
|
4320 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668145 ????????????????????????????????????????????????????????????
|
|
4380 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668205 ????????????????????????????????????????????????????????????
|
|
4440 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668265 ????????????????????????????????????????????????????????????
|
|
4500 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668325 ????????????????????????????????????????????????????????????
|
|
4560 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668385 ????????????????????????????????????????????????????????????
|
|
4620 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668445 ????????????????????????????????????????????????????????????
|
|
4680 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668505 ????????????????????????????????????????????????????????????
|
|
4740 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668565 ????????????????????????????????????????????????????????????
|
|
4800 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668625 ????????????????????????????????????????????????????????????
|
|
4860 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668685 ????????????????????????????????????????????????????????????
|
|
4920 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668745 ????????????????????????????????????????????????????????????
|
|
4980 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668805 ????????????????????????????????????????????????????????????
|
|
5040 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668865 ????????????????????????????????????????????????????????????
|
|
5100 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668925 ????????????????????????????????????????????????????????????
|
|
5160 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48668985 ????????????????????????????????????????????????????????????
|
|
5220 ------------------------------------------------------------
|
|
NR_111921 132 ------------------------------------------------------------
|
|
|
|
chr3 48669045 ?????????????????????????????????????????????????????AGACGGG
|
|
5280 -----------------------------------------------------|||||||
|
|
NR_111921 132 -----------------------------------------------------AGACGGG
|
|
|
|
chr3 48669105 AGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTAT
|
|
5340 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
NR_111921 139 AGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTAT
|
|
|
|
chr3 48669165 ATTAAAAAA 48669174
|
|
5400 ||||||||| 5409
|
|
NR_111921 199 ATTAAAAAA 208
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
NR_111921.1_modified 0 chr3 48663768 0 3S28M1D17M1827N76M2I6M3376N76M12H * 0 0 AAACACGAGAGGAGCGGAGGCGAGGGGTGAAGCGGAGCACTCCAATCGCTCCCAACTAGAGGTCCACCCAGGACCCAGAGACCTGGATTTGAGGCTGCTGCCCGGCAGATGGAGCGATCAGAAGCCACCAGGAGACGGGAGCTGGAGTGCAGTGGCTGTTCACAAGCGTGAAAGCAAAGATTAAAAAATTTGTTTTTATATTAAAAAA * AS:i:972 NM:i:5
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (203 aligned letters; 201 identities; 2 mismatches; 5206 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 203:
|
|
identities = 201,
|
|
mismatches = 2.
|
|
gaps = 5206:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 5206:
|
|
internal_insertions = 2:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 1;
|
|
internal_deletions = 5204:
|
|
open_internal_deletions = 3,
|
|
extend_internal_deletions = 5201;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 2)
|
|
self.assertEqual(counts.internal_deletions, 5204)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 5206)
|
|
self.assertEqual(counts.insertions, 2)
|
|
self.assertEqual(counts.deletions, 5204)
|
|
self.assertEqual(counts.gaps, 5206)
|
|
self.assertEqual(counts.aligned, 203)
|
|
self.assertRaises(StopIteration, next, alignments)
|
|
|
|
def test_reading(self):
|
|
"""Test parsing dna_rna.sam."""
|
|
path = "Blat/dna_rna.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
self.check_alignments(alignments)
|
|
alignments = iter(alignments)
|
|
self.check_alignments(alignments)
|
|
with Align.parse(path, "sam") as alignments:
|
|
self.check_alignments(alignments)
|
|
with self.assertRaises(AttributeError):
|
|
alignments._stream
|
|
with Align.parse(path, "sam") as alignments:
|
|
pass
|
|
with self.assertRaises(AttributeError):
|
|
alignments._stream
|
|
|
|
def test_reading_psl_comparison(self):
|
|
"""Test parsing dna_rna.sam and comparing to dna_rna.psl."""
|
|
path = "Blat/dna_rna.sam"
|
|
sam_alignments = Align.parse(path, "sam")
|
|
path = "Blat/dna_rna.psl"
|
|
psl_alignments = Align.parse(path, "psl")
|
|
for sam_alignment, psl_alignment in zip(sam_alignments, psl_alignments):
|
|
self.assertEqual(sam_alignment.target.id, psl_alignment.target.id)
|
|
self.assertEqual(sam_alignment.query.id, psl_alignment.query.id)
|
|
self.assertTrue(
|
|
np.array_equal(sam_alignment.coordinates, psl_alignment.coordinates)
|
|
)
|
|
|
|
def test_writing(self):
|
|
"""Test writing the alignments in dna_rna.sam."""
|
|
path = "Blat/dna_rna.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
stream = StringIO()
|
|
n = Align.write(alignments, stream, "sam", md=True)
|
|
self.assertEqual(n, 4)
|
|
stream.seek(0)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.check_alignments(alignments)
|
|
stream.close()
|
|
|
|
|
|
class TestAlign_dna(unittest.TestCase):
|
|
# The SAM files were generated using these commands:
|
|
# twoBitInfo hg19.2bit stdout | grep -v chrUn | grep -v _random | grep -v _hap | sort -n -k 2 -r > hg19.chrom.sizes
|
|
|
|
# psl2sam.pl psl_34_001.psl | samtools view -h -t hg19.chrom.sizes - > psl_34_001.sam
|
|
# psl2sam.pl psl_34_003.psl | samtools view -h -t hg19.chrom.sizes - > psl_34_003.sam
|
|
# psl2sam.pl psl_34_004.psl | samtools view -h -t hg19.chrom.sizes - > psl_34_004.sam
|
|
# psl2sam.pl psl_34_005.psl | samtools view -h -t hg19.chrom.sizes - > psl_34_005.sam
|
|
|
|
# Note that psl_34_002 was not included as the SAM format no longer allows
|
|
# an empty SAM file.
|
|
|
|
# The hard clipping symbols H were replaced by soft clipping symbols S in
|
|
# the file psl_34_005.sam.
|
|
|
|
def check_alignments_psl_34_001(self, alignments):
|
|
"""Check the alignments for psl_34_001/sam."""
|
|
self.assertEqual(list(alignments.metadata), ["PG"])
|
|
self.assertEqual(len(alignments.targets), 25)
|
|
self.assertEqual(alignments.targets[0].id, "chr1")
|
|
self.assertEqual(len(alignments.targets[0]), 249250621)
|
|
self.assertEqual(alignments.targets[1].id, "chr2")
|
|
self.assertEqual(len(alignments.targets[1]), 243199373)
|
|
self.assertEqual(alignments.targets[2].id, "chr3")
|
|
self.assertEqual(len(alignments.targets[2]), 198022430)
|
|
self.assertEqual(alignments.targets[3].id, "chr4")
|
|
self.assertEqual(len(alignments.targets[3]), 191154276)
|
|
self.assertEqual(alignments.targets[4].id, "chr5")
|
|
self.assertEqual(len(alignments.targets[4]), 180915260)
|
|
self.assertEqual(alignments.targets[5].id, "chr6")
|
|
self.assertEqual(len(alignments.targets[5]), 171115067)
|
|
self.assertEqual(alignments.targets[6].id, "chr7")
|
|
self.assertEqual(len(alignments.targets[6]), 159138663)
|
|
self.assertEqual(alignments.targets[7].id, "chrX")
|
|
self.assertEqual(len(alignments.targets[7]), 155270560)
|
|
self.assertEqual(alignments.targets[8].id, "chr8")
|
|
self.assertEqual(len(alignments.targets[8]), 146364022)
|
|
self.assertEqual(alignments.targets[9].id, "chr9")
|
|
self.assertEqual(len(alignments.targets[9]), 141213431)
|
|
self.assertEqual(alignments.targets[10].id, "chr10")
|
|
self.assertEqual(len(alignments.targets[10]), 135534747)
|
|
self.assertEqual(alignments.targets[11].id, "chr11")
|
|
self.assertEqual(len(alignments.targets[11]), 135006516)
|
|
self.assertEqual(alignments.targets[12].id, "chr12")
|
|
self.assertEqual(len(alignments.targets[12]), 133851895)
|
|
self.assertEqual(alignments.targets[13].id, "chr13")
|
|
self.assertEqual(len(alignments.targets[13]), 115169878)
|
|
self.assertEqual(alignments.targets[14].id, "chr14")
|
|
self.assertEqual(len(alignments.targets[14]), 107349540)
|
|
self.assertEqual(alignments.targets[15].id, "chr15")
|
|
self.assertEqual(len(alignments.targets[15]), 102531392)
|
|
self.assertEqual(alignments.targets[16].id, "chr16")
|
|
self.assertEqual(len(alignments.targets[16]), 90354753)
|
|
self.assertEqual(alignments.targets[17].id, "chr17")
|
|
self.assertEqual(len(alignments.targets[17]), 81195210)
|
|
self.assertEqual(alignments.targets[18].id, "chr18")
|
|
self.assertEqual(len(alignments.targets[18]), 78077248)
|
|
self.assertEqual(alignments.targets[19].id, "chr20")
|
|
self.assertEqual(len(alignments.targets[19]), 63025520)
|
|
self.assertEqual(alignments.targets[20].id, "chrY")
|
|
self.assertEqual(len(alignments.targets[20]), 59373566)
|
|
self.assertEqual(alignments.targets[21].id, "chr19")
|
|
self.assertEqual(len(alignments.targets[21]), 59128983)
|
|
self.assertEqual(alignments.targets[22].id, "chr22")
|
|
self.assertEqual(len(alignments.targets[22]), 51304566)
|
|
self.assertEqual(alignments.targets[23].id, "chr21")
|
|
self.assertEqual(len(alignments.targets[23]), 48129895)
|
|
self.assertEqual(alignments.targets[24].id, "chrM")
|
|
self.assertEqual(len(alignments.targets[24]), 16571)
|
|
self.assertEqual(len(alignments.metadata["PG"]), 1)
|
|
self.assertEqual(
|
|
alignments.metadata["PG"][0],
|
|
{
|
|
"ID": "samtools",
|
|
"PN": "samtools",
|
|
"VN": "1.14",
|
|
"CL": "samtools view -h -t hg19.chrom.sizes -",
|
|
},
|
|
)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 16))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr4")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 191154276)
|
|
self.assertEqual(len(alignment.query.seq), 16)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[61646095, 61646111],
|
|
[ 0, 16]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr4 61646095 ???????????????? 61646111
|
|
0 |||||||||||||||| 16
|
|
hg18_dna 0 ???????????????? 16
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 0 chr4 61646096 0 11H16M6H * 0 0 * * AS:i:16
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 16:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 16)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 33))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 33)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[10271783, 10271816],
|
|
[ 0, 33]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 10271783 ????????????????????????????????? 10271816
|
|
0 ||||||||||||||||||||||||||||||||| 33
|
|
hg18_dna 0 ????????????????????????????????? 33
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 0 chr1 10271784 0 33M * 0 0 * * AS:i:33
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 33:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 33)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 17))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 17)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[53575980, 53575997],
|
|
[ 17, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 53575980 ????????????????? 53575997
|
|
0 ||||||||||||||||| 17
|
|
hg18_dna 17 ????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 16 chr2 53575981 0 8H17M8H * 0 0 * * AS:i:17
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 17:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 17)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 41))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr9")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 141213431)
|
|
self.assertEqual(len(alignment.query.seq), 41)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[85737865, 85737906],
|
|
[ 0, 41]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr9 85737865 ????????????????????????????????????????? 85737906
|
|
0 ||||||||||||||||||||||||||||||||||||||||| 41
|
|
hg19_dna 0 ????????????????????????????????????????? 41
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr9 85737866 0 9H41M * 0 0 * * AS:i:29
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 41:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 41)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 41))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr8")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 146364022)
|
|
self.assertEqual(len(alignment.query.seq), 41)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[95160479, 95160520],
|
|
[ 0, 41]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr8 95160479 ????????????????????????????????????????? 95160520
|
|
0 ||||||||||||||||||||||||||||||||||||||||| 41
|
|
hg19_dna 0 ????????????????????????????????????????? 41
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr8 95160480 0 8H41M1H * 0 0 * * AS:i:41
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 41:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 41)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr22")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 51304566)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[42144400, 42144436],
|
|
[ 0, 36]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr22 42144400 ???????????????????????????????????? 42144436
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 0 ???????????????????????????????????? 36
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr22 42144401 0 11H36M3H * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 48)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[183925984, 183925990, 183925990, 183926028],
|
|
[ 0, 6, 10, 48]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 183925984 ??????----?????????????????????????????????????? 183926028
|
|
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
|
|
hg19_dna 0 ???????????????????????????????????????????????? 48
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr2 183925985 0 1H6M4I38M1H * 0 0 * * AS:i:27
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 44:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 4:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 4:
|
|
internal_insertions = 4:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 3;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 4)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 4)
|
|
self.assertEqual(counts.insertions, 4)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 4)
|
|
self.assertEqual(counts.aligned, 44)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 170))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[35483340, 35483365, 35483499, 35483510],
|
|
[ 0, 25, 25, 36]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 35483340 ????????????????????????????????????????????????????????????
|
|
0 |||||||||||||||||||||||||-----------------------------------
|
|
hg19_dna 0 ?????????????????????????-----------------------------------
|
|
|
|
chr19 35483400 ????????????????????????????????????????????????????????????
|
|
60 ------------------------------------------------------------
|
|
hg19_dna 25 ------------------------------------------------------------
|
|
|
|
chr19 35483460 ?????????????????????????????????????????????????? 35483510
|
|
120 ---------------------------------------||||||||||| 170
|
|
hg19_dna 25 ---------------------------------------??????????? 36
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr19 35483341 0 10H25M134D11M4H * 0 0 * * AS:i:0
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 134:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 134:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 134:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 133;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 134)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 134)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 134)
|
|
self.assertEqual(counts.gaps, 134)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr18")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 78077248)
|
|
self.assertEqual(len(alignment.query.seq), 39)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[23891310, 23891349],
|
|
[ 0, 39]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr18 23891310 ??????????????????????????????????????? 23891349
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 0 ??????????????????????????????????????? 39
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr18 23891311 0 10H39M1H * 0 0 * * AS:i:39
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 28))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr18")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 78077248)
|
|
self.assertEqual(len(alignment.query.seq), 28)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[43252217, 43252245],
|
|
[ 0, 28]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr18 43252217 ???????????????????????????? 43252245
|
|
0 |||||||||||||||||||||||||||| 28
|
|
hg19_dna 0 ???????????????????????????? 28
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr18 43252218 0 21H28M1H * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 28:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 28)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 51))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr13")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 115169878)
|
|
self.assertEqual(len(alignment.query.seq), 48)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[52759147, 52759157, 52759160, 52759198],
|
|
[ 0, 10, 10, 48]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr13 52759147 ??????????????????????????????????????????????????? 52759198
|
|
0 ||||||||||---|||||||||||||||||||||||||||||||||||||| 51
|
|
hg19_dna 0 ??????????---?????????????????????????????????????? 48
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr13 52759148 0 1H10M3D38M1H * 0 0 * * AS:i:30
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (48 aligned letters; 0 identities; 0 mismatches; 3 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 48:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 3:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 3:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 3:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 2;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 3)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 3)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 3)
|
|
self.assertEqual(counts.gaps, 3)
|
|
self.assertEqual(counts.aligned, 48)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 50))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[1207056, 1207106],
|
|
[ 0, 50]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 1207056 ?????????????????????????????????????????????????? 1207106
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
|
|
hg19_dna 0 ?????????????????????????????????????????????????? 50
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr1 1207057 0 50M * 0 0 * * AS:i:50
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 50:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 50)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 34))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 34)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[61700837, 61700871],
|
|
[ 0, 34]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 61700837 ?????????????????????????????????? 61700871
|
|
0 |||||||||||||||||||||||||||||||||| 34
|
|
hg19_dna 0 ?????????????????????????????????? 34
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr1 61700838 0 1H34M15H * 0 0 * * AS:i:22
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 34)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 38))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr4")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 191154276)
|
|
self.assertEqual(len(alignment.query.seq), 38)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[37558157, 37558173, 37558173, 37558191],
|
|
[ 38, 22, 18, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr4 37558157 ????????????????----?????????????????? 37558191
|
|
0 ||||||||||||||||----|||||||||||||||||| 38
|
|
hg19_dna 38 ?????????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr4 37558158 0 1H16M4I18M11H * 0 0 * * AS:i:15
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 4:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 4:
|
|
internal_insertions = 4:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 3;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 4)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 4)
|
|
self.assertEqual(counts.insertions, 4)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 4)
|
|
self.assertEqual(counts.aligned, 34)
|
|
alignment = next(alignments)
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr22")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 51304566)
|
|
self.assertEqual(len(alignment.query.seq), 37)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[48997405, 48997442],
|
|
[ 37, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr22 48997405 ????????????????????????????????????? 48997442
|
|
0 ||||||||||||||||||||||||||||||||||||| 37
|
|
hg19_dna 37 ????????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr22 48997406 0 1H37M12H * 0 0 * * AS:i:29
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 37:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 37)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[120641740, 120641776],
|
|
[ 36, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 120641740 ???????????????????????????????????? 120641776
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 36 ???????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr2 120641741 0 1H36M13H * 0 0 * * AS:i:32
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 39)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[54017130, 54017169],
|
|
[ 39, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 54017130 ??????????????????????????????????????? 54017169
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 39 ??????????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr19 54017131 0 1H39M10H * 0 0 * * AS:i:39
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 39)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[553742, 553781],
|
|
[ 39, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 553742 ??????????????????????????????????????? 553781
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 39 ??????????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr19 553743 0 1H39M10H * 0 0 * * AS:i:27
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr10")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 135534747)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[99388555, 99388591],
|
|
[ 36, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr10 99388555 ???????????????????????????????????? 99388591
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 36 ???????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr10 99388556 0 1H36M13H * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 25))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr10")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 135534747)
|
|
self.assertEqual(len(alignment.query.seq), 25)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[112178171, 112178196],
|
|
[ 25, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr10 112178171 ????????????????????????? 112178196
|
|
0 ||||||||||||||||||||||||| 25
|
|
hg19_dna 25 ????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr10 112178172 0 15H25M10H * 0 0 * * AS:i:21
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 25:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 25)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[39368490, 39368526],
|
|
[ 36, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 39368490 ???????????????????????????????????? 39368526
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 36 ???????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr1 39368491 0 1H36M13H * 0 0 * * AS:i:32
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 34))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 34)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[220325687, 220325721],
|
|
[ 34, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 220325687 ?????????????????????????????????? 220325721
|
|
0 |||||||||||||||||||||||||||||||||| 34
|
|
hg19_dna 34 ?????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr1 220325688 0 3H34M13H * 0 0 * * AS:i:30
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 34)
|
|
self.assertRaises(StopIteration, next, alignments)
|
|
|
|
def test_reading_psl_34_001(self):
|
|
"""Test parsing psl_34_001.sam."""
|
|
path = "Blat/psl_34_001.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
self.check_alignments_psl_34_001(alignments)
|
|
|
|
def test_writing_psl_34_001(self):
|
|
"""Test writing the alignments in psl_34_001.sam."""
|
|
path = "Blat/psl_34_001.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
stream = StringIO()
|
|
n = Align.write(alignments, stream, "sam")
|
|
self.assertEqual(n, 22)
|
|
stream.seek(0)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.check_alignments_psl_34_001(alignments)
|
|
stream.close()
|
|
|
|
def check_alignments_psl_34_003(self, alignments):
|
|
"""Check the alignments for psl_34_003/sam."""
|
|
self.assertEqual(list(alignments.metadata), ["PG"])
|
|
self.assertEqual(len(alignments.targets), 25)
|
|
self.assertEqual(alignments.targets[0].id, "chr1")
|
|
self.assertEqual(len(alignments.targets[0]), 249250621)
|
|
self.assertEqual(alignments.targets[1].id, "chr2")
|
|
self.assertEqual(len(alignments.targets[1]), 243199373)
|
|
self.assertEqual(alignments.targets[2].id, "chr3")
|
|
self.assertEqual(len(alignments.targets[2]), 198022430)
|
|
self.assertEqual(alignments.targets[3].id, "chr4")
|
|
self.assertEqual(len(alignments.targets[3]), 191154276)
|
|
self.assertEqual(alignments.targets[4].id, "chr5")
|
|
self.assertEqual(len(alignments.targets[4]), 180915260)
|
|
self.assertEqual(alignments.targets[5].id, "chr6")
|
|
self.assertEqual(len(alignments.targets[5]), 171115067)
|
|
self.assertEqual(alignments.targets[6].id, "chr7")
|
|
self.assertEqual(len(alignments.targets[6]), 159138663)
|
|
self.assertEqual(alignments.targets[7].id, "chrX")
|
|
self.assertEqual(len(alignments.targets[7]), 155270560)
|
|
self.assertEqual(alignments.targets[8].id, "chr8")
|
|
self.assertEqual(len(alignments.targets[8]), 146364022)
|
|
self.assertEqual(alignments.targets[9].id, "chr9")
|
|
self.assertEqual(len(alignments.targets[9]), 141213431)
|
|
self.assertEqual(alignments.targets[10].id, "chr10")
|
|
self.assertEqual(len(alignments.targets[10]), 135534747)
|
|
self.assertEqual(alignments.targets[11].id, "chr11")
|
|
self.assertEqual(len(alignments.targets[11]), 135006516)
|
|
self.assertEqual(alignments.targets[12].id, "chr12")
|
|
self.assertEqual(len(alignments.targets[12]), 133851895)
|
|
self.assertEqual(alignments.targets[13].id, "chr13")
|
|
self.assertEqual(len(alignments.targets[13]), 115169878)
|
|
self.assertEqual(alignments.targets[14].id, "chr14")
|
|
self.assertEqual(len(alignments.targets[14]), 107349540)
|
|
self.assertEqual(alignments.targets[15].id, "chr15")
|
|
self.assertEqual(len(alignments.targets[15]), 102531392)
|
|
self.assertEqual(alignments.targets[16].id, "chr16")
|
|
self.assertEqual(len(alignments.targets[16]), 90354753)
|
|
self.assertEqual(alignments.targets[17].id, "chr17")
|
|
self.assertEqual(len(alignments.targets[17]), 81195210)
|
|
self.assertEqual(alignments.targets[18].id, "chr18")
|
|
self.assertEqual(len(alignments.targets[18]), 78077248)
|
|
self.assertEqual(alignments.targets[19].id, "chr20")
|
|
self.assertEqual(len(alignments.targets[19]), 63025520)
|
|
self.assertEqual(alignments.targets[20].id, "chrY")
|
|
self.assertEqual(len(alignments.targets[20]), 59373566)
|
|
self.assertEqual(alignments.targets[21].id, "chr19")
|
|
self.assertEqual(len(alignments.targets[21]), 59128983)
|
|
self.assertEqual(alignments.targets[22].id, "chr22")
|
|
self.assertEqual(len(alignments.targets[22]), 51304566)
|
|
self.assertEqual(alignments.targets[23].id, "chr21")
|
|
self.assertEqual(len(alignments.targets[23]), 48129895)
|
|
self.assertEqual(alignments.targets[24].id, "chrM")
|
|
self.assertEqual(len(alignments.targets[24]), 16571)
|
|
self.assertEqual(len(alignments.targets), 25)
|
|
self.assertEqual(len(alignments.metadata["PG"]), 1)
|
|
self.assertEqual(
|
|
alignments.metadata["PG"][0],
|
|
{
|
|
"ID": "samtools",
|
|
"PN": "samtools",
|
|
"VN": "1.14",
|
|
"CL": "samtools view -h -t hg19.chrom.sizes -",
|
|
},
|
|
)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 16))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr4")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 191154276)
|
|
self.assertEqual(len(alignment.query.seq), 16)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[61646095, 61646111],
|
|
[ 0, 16]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr4 61646095 ???????????????? 61646111
|
|
0 |||||||||||||||| 16
|
|
hg18_dna 0 ???????????????? 16
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 0 chr4 61646096 0 11H16M6H * 0 0 * * AS:i:16
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 16:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 16)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 33))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 33)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[10271783, 10271816],
|
|
[ 0, 33]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 10271783 ????????????????????????????????? 10271816
|
|
0 ||||||||||||||||||||||||||||||||| 33
|
|
hg18_dna 0 ????????????????????????????????? 33
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 0 chr1 10271784 0 33M * 0 0 * * AS:i:33
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 33:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 33)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 17))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 17)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[53575980, 53575997],
|
|
[ 17, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 53575980 ????????????????? 53575997
|
|
0 ||||||||||||||||| 17
|
|
hg18_dna 17 ????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 16 chr2 53575981 0 8H17M8H * 0 0 * * AS:i:17
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 17:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 17)
|
|
self.assertRaises(StopIteration, next, alignments)
|
|
|
|
def test_reading_psl_34_003(self):
|
|
"""Test parsing psl_34_003.sam."""
|
|
path = "Blat/psl_34_003.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
self.check_alignments_psl_34_003(alignments)
|
|
|
|
def test_writing_psl_34_003(self):
|
|
"""Test writing the alignments in psl_34_003.sam."""
|
|
path = "Blat/psl_34_003.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
stream = StringIO()
|
|
n = Align.write(alignments, stream, "sam")
|
|
self.assertEqual(n, 3)
|
|
stream.seek(0)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.check_alignments_psl_34_003(alignments)
|
|
stream.close()
|
|
|
|
def check_alignments_psl_34_004(self, alignments):
|
|
"""Check the alignments for psl_34_004/sam."""
|
|
self.assertEqual(list(alignments.metadata), ["PG"])
|
|
self.assertEqual(len(alignments.targets), 25)
|
|
self.assertEqual(alignments.targets[0].id, "chr1")
|
|
self.assertEqual(len(alignments.targets[0]), 249250621)
|
|
self.assertEqual(alignments.targets[1].id, "chr2")
|
|
self.assertEqual(len(alignments.targets[1]), 243199373)
|
|
self.assertEqual(alignments.targets[2].id, "chr3")
|
|
self.assertEqual(len(alignments.targets[2]), 198022430)
|
|
self.assertEqual(alignments.targets[3].id, "chr4")
|
|
self.assertEqual(len(alignments.targets[3]), 191154276)
|
|
self.assertEqual(alignments.targets[4].id, "chr5")
|
|
self.assertEqual(len(alignments.targets[4]), 180915260)
|
|
self.assertEqual(alignments.targets[5].id, "chr6")
|
|
self.assertEqual(len(alignments.targets[5]), 171115067)
|
|
self.assertEqual(alignments.targets[6].id, "chr7")
|
|
self.assertEqual(len(alignments.targets[6]), 159138663)
|
|
self.assertEqual(alignments.targets[7].id, "chrX")
|
|
self.assertEqual(len(alignments.targets[7]), 155270560)
|
|
self.assertEqual(alignments.targets[8].id, "chr8")
|
|
self.assertEqual(len(alignments.targets[8]), 146364022)
|
|
self.assertEqual(alignments.targets[9].id, "chr9")
|
|
self.assertEqual(len(alignments.targets[9]), 141213431)
|
|
self.assertEqual(alignments.targets[10].id, "chr10")
|
|
self.assertEqual(len(alignments.targets[10]), 135534747)
|
|
self.assertEqual(alignments.targets[11].id, "chr11")
|
|
self.assertEqual(len(alignments.targets[11]), 135006516)
|
|
self.assertEqual(alignments.targets[12].id, "chr12")
|
|
self.assertEqual(len(alignments.targets[12]), 133851895)
|
|
self.assertEqual(alignments.targets[13].id, "chr13")
|
|
self.assertEqual(len(alignments.targets[13]), 115169878)
|
|
self.assertEqual(alignments.targets[14].id, "chr14")
|
|
self.assertEqual(len(alignments.targets[14]), 107349540)
|
|
self.assertEqual(alignments.targets[15].id, "chr15")
|
|
self.assertEqual(len(alignments.targets[15]), 102531392)
|
|
self.assertEqual(alignments.targets[16].id, "chr16")
|
|
self.assertEqual(len(alignments.targets[16]), 90354753)
|
|
self.assertEqual(alignments.targets[17].id, "chr17")
|
|
self.assertEqual(len(alignments.targets[17]), 81195210)
|
|
self.assertEqual(alignments.targets[18].id, "chr18")
|
|
self.assertEqual(len(alignments.targets[18]), 78077248)
|
|
self.assertEqual(alignments.targets[19].id, "chr20")
|
|
self.assertEqual(len(alignments.targets[19]), 63025520)
|
|
self.assertEqual(alignments.targets[20].id, "chrY")
|
|
self.assertEqual(len(alignments.targets[20]), 59373566)
|
|
self.assertEqual(alignments.targets[21].id, "chr19")
|
|
self.assertEqual(len(alignments.targets[21]), 59128983)
|
|
self.assertEqual(alignments.targets[22].id, "chr22")
|
|
self.assertEqual(len(alignments.targets[22]), 51304566)
|
|
self.assertEqual(alignments.targets[23].id, "chr21")
|
|
self.assertEqual(len(alignments.targets[23]), 48129895)
|
|
self.assertEqual(alignments.targets[24].id, "chrM")
|
|
self.assertEqual(len(alignments.targets[24]), 16571)
|
|
self.assertEqual(len(alignments.targets), 25)
|
|
self.assertEqual(len(alignments.metadata["PG"]), 1)
|
|
self.assertEqual(
|
|
alignments.metadata["PG"][0],
|
|
{
|
|
"ID": "samtools",
|
|
"PN": "samtools",
|
|
"VN": "1.14",
|
|
"CL": "samtools view -h -t hg19.chrom.sizes -",
|
|
},
|
|
)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 41))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr9")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 141213431)
|
|
self.assertEqual(len(alignment.query.seq), 41)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[85737865, 85737906],
|
|
[ 0, 41]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr9 85737865 ????????????????????????????????????????? 85737906
|
|
0 ||||||||||||||||||||||||||||||||||||||||| 41
|
|
hg19_dna 0 ????????????????????????????????????????? 41
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr9 85737866 0 9H41M * 0 0 * * AS:i:29
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 41:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 41)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 41))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr8")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 146364022)
|
|
self.assertEqual(len(alignment.query.seq), 41)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[95160479, 95160520],
|
|
[ 0, 41]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr8 95160479 ????????????????????????????????????????? 95160520
|
|
0 ||||||||||||||||||||||||||||||||||||||||| 41
|
|
hg19_dna 0 ????????????????????????????????????????? 41
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr8 95160480 0 8H41M1H * 0 0 * * AS:i:41
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 41:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 41)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr22")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 51304566)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[42144400, 42144436],
|
|
[ 0, 36]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr22 42144400 ???????????????????????????????????? 42144436
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 0 ???????????????????????????????????? 36
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr22 42144401 0 11H36M3H * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 48))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 48)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[183925984, 183925990, 183925990, 183926028],
|
|
[ 0, 6, 10, 48]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 183925984 ??????----?????????????????????????????????????? 183926028
|
|
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
|
|
hg19_dna 0 ???????????????????????????????????????????????? 48
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr2 183925985 0 1H6M4I38M1H * 0 0 * * AS:i:27
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 44:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 4:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 4:
|
|
internal_insertions = 4:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 3;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 4)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 4)
|
|
self.assertEqual(counts.insertions, 4)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 4)
|
|
self.assertEqual(counts.aligned, 44)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 170))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[35483340, 35483365, 35483499, 35483510],
|
|
[ 0, 25, 25, 36]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 35483340 ????????????????????????????????????????????????????????????
|
|
0 |||||||||||||||||||||||||-----------------------------------
|
|
hg19_dna 0 ?????????????????????????-----------------------------------
|
|
|
|
chr19 35483400 ????????????????????????????????????????????????????????????
|
|
60 ------------------------------------------------------------
|
|
hg19_dna 25 ------------------------------------------------------------
|
|
|
|
chr19 35483460 ?????????????????????????????????????????????????? 35483510
|
|
120 ---------------------------------------||||||||||| 170
|
|
hg19_dna 25 ---------------------------------------??????????? 36
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr19 35483341 0 10H25M134D11M4H * 0 0 * * AS:i:0
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 134:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 134:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 134:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 133;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 134)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 134)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 134)
|
|
self.assertEqual(counts.gaps, 134)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr18")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 78077248)
|
|
self.assertEqual(len(alignment.query.seq), 39)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[23891310, 23891349],
|
|
[ 0, 39]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr18 23891310 ??????????????????????????????????????? 23891349
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 0 ??????????????????????????????????????? 39
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr18 23891311 0 10H39M1H * 0 0 * * AS:i:39
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 28))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr18")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 78077248)
|
|
self.assertEqual(len(alignment.query.seq), 28)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[43252217, 43252245],
|
|
[ 0, 28]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr18 43252217 ???????????????????????????? 43252245
|
|
0 |||||||||||||||||||||||||||| 28
|
|
hg19_dna 0 ???????????????????????????? 28
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr18 43252218 0 21H28M1H * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 28:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 28)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 51))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr13")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 115169878)
|
|
self.assertEqual(len(alignment.query.seq), 48)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[52759147, 52759157, 52759160, 52759198],
|
|
[ 0, 10, 10, 48]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr13 52759147 ??????????????????????????????????????????????????? 52759198
|
|
0 ||||||||||---|||||||||||||||||||||||||||||||||||||| 51
|
|
hg19_dna 0 ??????????---?????????????????????????????????????? 48
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr13 52759148 0 1H10M3D38M1H * 0 0 * * AS:i:30
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (48 aligned letters; 0 identities; 0 mismatches; 3 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 48:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 3:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 3:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 3:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 2;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 3)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 3)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 3)
|
|
self.assertEqual(counts.gaps, 3)
|
|
self.assertEqual(counts.aligned, 48)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 50))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[1207056, 1207106],
|
|
[ 0, 50]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 1207056 ?????????????????????????????????????????????????? 1207106
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
|
|
hg19_dna 0 ?????????????????????????????????????????????????? 50
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr1 1207057 0 50M * 0 0 * * AS:i:50
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 50:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 50)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 34))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 34)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[61700837, 61700871],
|
|
[ 0, 34]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 61700837 ?????????????????????????????????? 61700871
|
|
0 |||||||||||||||||||||||||||||||||| 34
|
|
hg19_dna 0 ?????????????????????????????????? 34
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr1 61700838 0 1H34M15H * 0 0 * * AS:i:22
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 34)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 38))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr4")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 191154276)
|
|
self.assertEqual(len(alignment.query.seq), 38)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[37558157, 37558173, 37558173, 37558191],
|
|
[ 38, 22, 18, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr4 37558157 ????????????????----?????????????????? 37558191
|
|
0 ||||||||||||||||----|||||||||||||||||| 38
|
|
hg19_dna 38 ?????????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr4 37558158 0 1H16M4I18M11H * 0 0 * * AS:i:15
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 4:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 4:
|
|
internal_insertions = 4:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 3;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 4)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 4)
|
|
self.assertEqual(counts.insertions, 4)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 4)
|
|
self.assertEqual(counts.aligned, 34)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 37))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr22")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 51304566)
|
|
self.assertEqual(len(alignment.query.seq), 37)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[48997405, 48997442],
|
|
[ 37, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr22 48997405 ????????????????????????????????????? 48997442
|
|
0 ||||||||||||||||||||||||||||||||||||| 37
|
|
hg19_dna 37 ????????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr22 48997406 0 1H37M12H * 0 0 * * AS:i:29
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 37:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 37)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[120641740, 120641776],
|
|
[ 36, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 120641740 ???????????????????????????????????? 120641776
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 36 ???????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr2 120641741 0 1H36M13H * 0 0 * * AS:i:32
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 39)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[54017130, 54017169],
|
|
[ 39, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 54017130 ??????????????????????????????????????? 54017169
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 39 ??????????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr19 54017131 0 1H39M10H * 0 0 * * AS:i:39
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 39)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[553742, 553781],
|
|
[ 39, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 553742 ??????????????????????????????????????? 553781
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 39 ??????????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr19 553743 0 1H39M10H * 0 0 * * AS:i:27
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr10")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 135534747)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[99388555, 99388591],
|
|
[ 36, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr10 99388555 ???????????????????????????????????? 99388591
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 36 ???????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr10 99388556 0 1H36M13H * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 25))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr10")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 135534747)
|
|
self.assertEqual(len(alignment.query.seq), 25)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[112178171, 112178196],
|
|
[ 25, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr10 112178171 ????????????????????????? 112178196
|
|
0 ||||||||||||||||||||||||| 25
|
|
hg19_dna 25 ????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr10 112178172 0 15H25M10H * 0 0 * * AS:i:21
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 25:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 25)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 36)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[39368490, 39368526],
|
|
[ 36, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 39368490 ???????????????????????????????????? 39368526
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 36 ???????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr1 39368491 0 1H36M13H * 0 0 * * AS:i:32
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 34))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 34)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[220325687, 220325721],
|
|
[ 34, 0]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 220325687 ?????????????????????????????????? 220325721
|
|
0 |||||||||||||||||||||||||||||||||| 34
|
|
hg19_dna 34 ?????????????????????????????????? 0
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr1 220325688 0 3H34M13H * 0 0 * * AS:i:30
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 34)
|
|
self.assertRaises(StopIteration, next, alignments)
|
|
|
|
def test_reading_psl_34_004(self):
|
|
"""Test parsing psl_34_004.sam."""
|
|
path = "Blat/psl_34_004.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
self.check_alignments_psl_34_004(alignments)
|
|
|
|
def test_writing_psl_34_004(self):
|
|
"""Test writing the alignments in psl_34_004.sam."""
|
|
path = "Blat/psl_34_004.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
stream = StringIO()
|
|
n = Align.write(alignments, stream, "sam")
|
|
self.assertEqual(n, 19)
|
|
stream.seek(0)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.check_alignments_psl_34_004(alignments)
|
|
stream.close()
|
|
|
|
def check_alignments_psl_34_005(self, alignments):
|
|
"""Check the alignments for psl_34_005.sam."""
|
|
self.assertEqual(list(alignments.metadata), ["PG"])
|
|
self.assertEqual(len(alignments.targets), 25)
|
|
self.assertEqual(alignments.targets[0].id, "chr1")
|
|
self.assertEqual(len(alignments.targets[0]), 249250621)
|
|
self.assertEqual(alignments.targets[1].id, "chr2")
|
|
self.assertEqual(len(alignments.targets[1]), 243199373)
|
|
self.assertEqual(alignments.targets[2].id, "chr3")
|
|
self.assertEqual(len(alignments.targets[2]), 198022430)
|
|
self.assertEqual(alignments.targets[3].id, "chr4")
|
|
self.assertEqual(len(alignments.targets[3]), 191154276)
|
|
self.assertEqual(alignments.targets[4].id, "chr5")
|
|
self.assertEqual(len(alignments.targets[4]), 180915260)
|
|
self.assertEqual(alignments.targets[5].id, "chr6")
|
|
self.assertEqual(len(alignments.targets[5]), 171115067)
|
|
self.assertEqual(alignments.targets[6].id, "chr7")
|
|
self.assertEqual(len(alignments.targets[6]), 159138663)
|
|
self.assertEqual(alignments.targets[7].id, "chrX")
|
|
self.assertEqual(len(alignments.targets[7]), 155270560)
|
|
self.assertEqual(alignments.targets[8].id, "chr8")
|
|
self.assertEqual(len(alignments.targets[8]), 146364022)
|
|
self.assertEqual(alignments.targets[9].id, "chr9")
|
|
self.assertEqual(len(alignments.targets[9]), 141213431)
|
|
self.assertEqual(alignments.targets[10].id, "chr10")
|
|
self.assertEqual(len(alignments.targets[10]), 135534747)
|
|
self.assertEqual(alignments.targets[11].id, "chr11")
|
|
self.assertEqual(len(alignments.targets[11]), 135006516)
|
|
self.assertEqual(alignments.targets[12].id, "chr12")
|
|
self.assertEqual(len(alignments.targets[12]), 133851895)
|
|
self.assertEqual(alignments.targets[13].id, "chr13")
|
|
self.assertEqual(len(alignments.targets[13]), 115169878)
|
|
self.assertEqual(alignments.targets[14].id, "chr14")
|
|
self.assertEqual(len(alignments.targets[14]), 107349540)
|
|
self.assertEqual(alignments.targets[15].id, "chr15")
|
|
self.assertEqual(len(alignments.targets[15]), 102531392)
|
|
self.assertEqual(alignments.targets[16].id, "chr16")
|
|
self.assertEqual(len(alignments.targets[16]), 90354753)
|
|
self.assertEqual(alignments.targets[17].id, "chr17")
|
|
self.assertEqual(len(alignments.targets[17]), 81195210)
|
|
self.assertEqual(alignments.targets[18].id, "chr18")
|
|
self.assertEqual(len(alignments.targets[18]), 78077248)
|
|
self.assertEqual(alignments.targets[19].id, "chr20")
|
|
self.assertEqual(len(alignments.targets[19]), 63025520)
|
|
self.assertEqual(alignments.targets[20].id, "chrY")
|
|
self.assertEqual(len(alignments.targets[20]), 59373566)
|
|
self.assertEqual(alignments.targets[21].id, "chr19")
|
|
self.assertEqual(len(alignments.targets[21]), 59128983)
|
|
self.assertEqual(alignments.targets[22].id, "chr22")
|
|
self.assertEqual(len(alignments.targets[22]), 51304566)
|
|
self.assertEqual(alignments.targets[23].id, "chr21")
|
|
self.assertEqual(len(alignments.targets[23]), 48129895)
|
|
self.assertEqual(alignments.targets[24].id, "chrM")
|
|
self.assertEqual(len(alignments.targets[24]), 16571)
|
|
self.assertEqual(len(alignments.targets), 25)
|
|
self.assertEqual(len(alignments.metadata["PG"]), 1)
|
|
self.assertEqual(
|
|
alignments.metadata["PG"][0],
|
|
{
|
|
"ID": "samtools",
|
|
"PN": "samtools",
|
|
"VN": "1.14",
|
|
"CL": "samtools view -h -t hg19.chrom.sizes -",
|
|
},
|
|
)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 16))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr4")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 191154276)
|
|
self.assertEqual(len(alignment.query.seq), 33)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[61646095, 61646111],
|
|
[ 11, 27]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr4 61646095 ???????????????? 61646111
|
|
0 |||||||||||||||| 16
|
|
hg18_dna 11 ???????????????? 27
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 0 chr4 61646096 0 11S16M6S * 0 0 * * AS:i:16
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 16:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 16)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 33))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 33)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[10271783, 10271816],
|
|
[ 0, 33]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 10271783 ????????????????????????????????? 10271816
|
|
0 ||||||||||||||||||||||||||||||||| 33
|
|
hg18_dna 0 ????????????????????????????????? 33
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 0 chr1 10271784 0 33M * 0 0 * * AS:i:33
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 33:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 33)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 17))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg18_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 33)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[53575980, 53575997],
|
|
[ 25, 8]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 53575980 ????????????????? 53575997
|
|
0 ||||||||||||||||| 17
|
|
hg18_dna 25 ????????????????? 8
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg18_dna 16 chr2 53575981 0 8S17M8S * 0 0 * * AS:i:17
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 17:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 17)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 41))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr9")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 141213431)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[85737865, 85737906],
|
|
[ 9, 50]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr9 85737865 ????????????????????????????????????????? 85737906
|
|
0 ||||||||||||||||||||||||||||||||||||||||| 41
|
|
hg19_dna 9 ????????????????????????????????????????? 50
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr9 85737866 0 9S41M * 0 0 * * AS:i:29
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 41:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 41)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 41))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr8")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 146364022)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[95160479, 95160520],
|
|
[ 8, 49]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr8 95160479 ????????????????????????????????????????? 95160520
|
|
0 ||||||||||||||||||||||||||||||||||||||||| 41
|
|
hg19_dna 8 ????????????????????????????????????????? 49
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr8 95160480 0 8S41M1S * 0 0 * * AS:i:41
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 41:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 41)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr22")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 51304566)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[42144400, 42144436],
|
|
[ 11, 47]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr22 42144400 ???????????????????????????????????? 42144436
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 11 ???????????????????????????????????? 47
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr22 42144401 0 11S36M3S * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 48))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[183925984, 183925990, 183925990, 183926028],
|
|
[ 1, 7, 11, 49]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 183925984 ??????----?????????????????????????????????????? 183926028
|
|
0 ||||||----|||||||||||||||||||||||||||||||||||||| 48
|
|
hg19_dna 1 ???????????????????????????????????????????????? 49
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr2 183925985 0 1S6M4I38M1S * 0 0 * * AS:i:27
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 44:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 4:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 4:
|
|
internal_insertions = 4:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 3;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 4)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 4)
|
|
self.assertEqual(counts.insertions, 4)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 4)
|
|
self.assertEqual(counts.aligned, 44)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 170))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[35483340, 35483365, 35483499, 35483510],
|
|
[ 10, 35, 35, 46]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 35483340 ????????????????????????????????????????????????????????????
|
|
0 |||||||||||||||||||||||||-----------------------------------
|
|
hg19_dna 10 ?????????????????????????-----------------------------------
|
|
|
|
chr19 35483400 ????????????????????????????????????????????????????????????
|
|
60 ------------------------------------------------------------
|
|
hg19_dna 35 ------------------------------------------------------------
|
|
|
|
chr19 35483460 ?????????????????????????????????????????????????? 35483510
|
|
120 ---------------------------------------||||||||||| 170
|
|
hg19_dna 35 ---------------------------------------??????????? 46
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr19 35483341 0 10S25M134D11M4S * 0 0 * * AS:i:0
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 134:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 134:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 134:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 133;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 134)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 134)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 134)
|
|
self.assertEqual(counts.gaps, 134)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr18")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 78077248)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[23891310, 23891349],
|
|
[ 10, 49]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr18 23891310 ??????????????????????????????????????? 23891349
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 10 ??????????????????????????????????????? 49
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr18 23891311 0 10S39M1S * 0 0 * * AS:i:39
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 28))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr18")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 78077248)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[43252217, 43252245],
|
|
[ 21, 49]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr18 43252217 ???????????????????????????? 43252245
|
|
0 |||||||||||||||||||||||||||| 28
|
|
hg19_dna 21 ???????????????????????????? 49
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr18 43252218 0 21S28M1S * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 28:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 28)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 51))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr13")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 115169878)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[52759147, 52759157, 52759160, 52759198],
|
|
[ 1, 11, 11, 49]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr13 52759147 ??????????????????????????????????????????????????? 52759198
|
|
0 ||||||||||---|||||||||||||||||||||||||||||||||||||| 51
|
|
hg19_dna 1 ??????????---?????????????????????????????????????? 49
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr13 52759148 0 1S10M3D38M1S * 0 0 * * AS:i:30
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (48 aligned letters; 0 identities; 0 mismatches; 3 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 48:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 3:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 3:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 3:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 2;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 3)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 3)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 3)
|
|
self.assertEqual(counts.gaps, 3)
|
|
self.assertEqual(counts.aligned, 48)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 50))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[1207056, 1207106],
|
|
[ 0, 50]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 1207056 ?????????????????????????????????????????????????? 1207106
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||| 50
|
|
hg19_dna 0 ?????????????????????????????????????????????????? 50
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr1 1207057 0 50M * 0 0 * * AS:i:50
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 50:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 50)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 34))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[61700837, 61700871],
|
|
[ 1, 35]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 61700837 ?????????????????????????????????? 61700871
|
|
0 |||||||||||||||||||||||||||||||||| 34
|
|
hg19_dna 1 ?????????????????????????????????? 35
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 0 chr1 61700838 0 1S34M15S * 0 0 * * AS:i:22
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 34)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 38))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr4")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 191154276)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[37558157, 37558173, 37558173, 37558191],
|
|
[ 49, 33, 29, 11]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr4 37558157 ????????????????----?????????????????? 37558191
|
|
0 ||||||||||||||||----|||||||||||||||||| 38
|
|
hg19_dna 49 ?????????????????????????????????????? 11
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr4 37558158 0 1S16M4I18M11S * 0 0 * * AS:i:15
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 4 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 4:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 4:
|
|
internal_insertions = 4:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 3;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 4)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 4)
|
|
self.assertEqual(counts.insertions, 4)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 4)
|
|
self.assertEqual(counts.aligned, 34)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 37))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr22")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 51304566)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[48997405, 48997442],
|
|
[ 49, 12]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr22 48997405 ????????????????????????????????????? 48997442
|
|
0 ||||||||||||||||||||||||||||||||||||| 37
|
|
hg19_dna 49 ????????????????????????????????????? 12
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr22 48997406 0 1S37M12S * 0 0 * * AS:i:29
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 37:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 37)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr2")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 243199373)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[120641740, 120641776],
|
|
[ 49, 13]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr2 120641740 ???????????????????????????????????? 120641776
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 49 ???????????????????????????????????? 13
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr2 120641741 0 1S36M13S * 0 0 * * AS:i:32
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[54017130, 54017169],
|
|
[ 49, 10]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 54017130 ??????????????????????????????????????? 54017169
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 49 ??????????????????????????????????????? 10
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr19 54017131 0 1S39M10S * 0 0 * * AS:i:39
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 39))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr19")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 59128983)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[553742, 553781],
|
|
[ 49, 10]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr19 553742 ??????????????????????????????????????? 553781
|
|
0 ||||||||||||||||||||||||||||||||||||||| 39
|
|
hg19_dna 49 ??????????????????????????????????????? 10
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr19 553743 0 1S39M10S * 0 0 * * AS:i:27
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 39:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 39)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr10")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 135534747)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[99388555, 99388591],
|
|
[ 49, 13]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr10 99388555 ???????????????????????????????????? 99388591
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 49 ???????????????????????????????????? 13
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr10 99388556 0 1S36M13S * 0 0 * * AS:i:24
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 25))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr10")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 135534747)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[112178171, 112178196],
|
|
[ 35, 10]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr10 112178171 ????????????????????????? 112178196
|
|
0 ||||||||||||||||||||||||| 25
|
|
hg19_dna 35 ????????????????????????? 10
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr10 112178172 0 15S25M10S * 0 0 * * AS:i:21
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 25:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 25)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 36))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[39368490, 39368526],
|
|
[ 49, 13]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 39368490 ???????????????????????????????????? 39368526
|
|
0 |||||||||||||||||||||||||||||||||||| 36
|
|
hg19_dna 49 ???????????????????????????????????? 13
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr1 39368491 0 1S36M13S * 0 0 * * AS:i:32
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 36:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 36)
|
|
alignment = next(alignments)
|
|
self.assertEqual(alignment.shape, (2, 34))
|
|
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
|
|
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
|
|
self.assertEqual(len(alignment), 2)
|
|
self.assertIs(alignment.sequences[0], alignment.target)
|
|
self.assertIs(alignment.sequences[1], alignment.query)
|
|
self.assertEqual(alignment.target.id, "chr1")
|
|
self.assertEqual(alignment.query.id, "hg19_dna")
|
|
self.assertEqual(len(alignment.target.seq), 249250621)
|
|
self.assertEqual(len(alignment.query.seq), 50)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates,
|
|
# fmt: off
|
|
np.array([[220325687, 220325721],
|
|
[ 47, 13]]),
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
chr1 220325687 ?????????????????????????????????? 220325721
|
|
0 |||||||||||||||||||||||||||||||||| 34
|
|
hg19_dna 47 ?????????????????????????????????? 13
|
|
""",
|
|
)
|
|
self.assertEqual(
|
|
format(alignment, "sam"),
|
|
"""\
|
|
hg19_dna 16 chr1 220325688 0 3S34M13S * 0 0 * * AS:i:30
|
|
""",
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 34:
|
|
identities = 0,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 34)
|
|
self.assertRaises(StopIteration, next, alignments)
|
|
|
|
def test_reading_psl_34_005(self):
|
|
"""Test parsing psl_34_005.sam."""
|
|
path = "Blat/psl_34_005.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
self.check_alignments_psl_34_005(alignments)
|
|
|
|
def test_writing_psl_34_005(self):
|
|
"""Test writing the alignments in psl_34_005.sam."""
|
|
path = "Blat/psl_34_005.sam"
|
|
alignments = Align.parse(path, "sam")
|
|
stream = StringIO()
|
|
n = Align.write(alignments, stream, "sam")
|
|
self.assertEqual(n, 22)
|
|
stream.seek(0)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.check_alignments_psl_34_005(alignments)
|
|
stream.close()
|
|
|
|
|
|
class TestAlign_sambam(unittest.TestCase):
|
|
def test_ex1(self):
|
|
alignments = Align.parse("SamBam/ex1.sam", "sam")
|
|
n = 0
|
|
for alignment in alignments:
|
|
n += 1
|
|
self.assertEqual(n, 3270)
|
|
self.assertEqual(alignment.sequences[0].id, "chr2")
|
|
self.assertEqual(alignment.sequences[1].id, "EAS114_26:7:37:79:581")
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq, "TTTTCTGGCATGAAAAAAAAAAAAAAAAAAAAAAA"
|
|
)
|
|
self.assertEqual(alignment.flag, 83)
|
|
self.assertEqual(alignment.mapq, 68)
|
|
self.assertTrue(
|
|
np.array_equal(alignment.coordinates, np.array([[1532, 1567], [35, 0]]))
|
|
)
|
|
self.assertEqual(alignment.rnext, "chr2")
|
|
self.assertEqual(alignment.pnext, 1348)
|
|
self.assertEqual(alignment.tlen, -219)
|
|
self.assertEqual(
|
|
alignment.sequences[1].letter_annotations["phred_quality"],
|
|
[
|
|
18,
|
|
11,
|
|
11,
|
|
11,
|
|
28,
|
|
28,
|
|
28,
|
|
21,
|
|
28,
|
|
28,
|
|
28,
|
|
27,
|
|
28,
|
|
28,
|
|
28,
|
|
27,
|
|
26,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
12,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
],
|
|
)
|
|
self.assertEqual(len(alignment.annotations), 6)
|
|
self.assertEqual(alignment.annotations["MF"], 18)
|
|
self.assertEqual(alignment.annotations["Aq"], 27)
|
|
self.assertEqual(alignment.annotations["NM"], 2)
|
|
self.assertEqual(alignment.annotations["UQ"], 23)
|
|
self.assertEqual(alignment.annotations["H0"], 0)
|
|
self.assertEqual(alignment.annotations["H1"], 1)
|
|
|
|
def test_ex1_header(self):
|
|
alignments = Align.parse("SamBam/ex1_header.sam", "sam")
|
|
self.assertEqual(alignments.metadata["HD"], {"VN": "1.3", "SO": "coordinate"})
|
|
self.assertEqual(len(alignments.targets), 2)
|
|
self.assertEqual(alignments.targets[0].id, "chr1")
|
|
self.assertEqual(len(alignments.targets[0].seq), 1575)
|
|
self.assertEqual(alignments.targets[1].id, "chr2")
|
|
self.assertEqual(len(alignments.targets[1].seq), 1584)
|
|
n = 0
|
|
for alignment in alignments:
|
|
n += 1
|
|
self.assertEqual(n, 3270)
|
|
self.assertEqual(alignment.sequences[0].id, "chr2")
|
|
self.assertEqual(len(alignment.sequences[0].seq), 1584)
|
|
self.assertEqual(alignment.sequences[1].id, "EAS114_26:7:37:79:581")
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq, "TTTTCTGGCATGAAAAAAAAAAAAAAAAAAAAAAA"
|
|
)
|
|
self.assertEqual(alignment.flag, 83)
|
|
self.assertEqual(alignment.mapq, 68)
|
|
self.assertTrue(
|
|
np.array_equal(alignment.coordinates, np.array([[1532, 1567], [35, 0]]))
|
|
)
|
|
self.assertEqual(alignment.rnext, "chr2")
|
|
self.assertEqual(alignment.pnext, 1348)
|
|
self.assertEqual(alignment.tlen, -219)
|
|
self.assertEqual(
|
|
alignment.sequences[1].letter_annotations["phred_quality"],
|
|
[
|
|
18,
|
|
11,
|
|
11,
|
|
11,
|
|
28,
|
|
28,
|
|
28,
|
|
21,
|
|
28,
|
|
28,
|
|
28,
|
|
27,
|
|
28,
|
|
28,
|
|
28,
|
|
27,
|
|
26,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
12,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
28,
|
|
],
|
|
)
|
|
self.assertEqual(len(alignment.annotations), 6)
|
|
self.assertEqual(alignment.annotations["MF"], 18)
|
|
self.assertEqual(alignment.annotations["Aq"], 27)
|
|
self.assertEqual(alignment.annotations["NM"], 2)
|
|
self.assertEqual(alignment.annotations["UQ"], 23)
|
|
self.assertEqual(alignment.annotations["H0"], 0)
|
|
self.assertEqual(alignment.annotations["H1"], 1)
|
|
|
|
def test_sam1(self):
|
|
alignments = Align.parse("SamBam/sam1.sam", "sam")
|
|
self.assertEqual(len(alignments.targets), 1)
|
|
self.assertEqual(alignments.targets[0].id, "1")
|
|
self.assertEqual(len(alignments.targets[0].seq), 239940)
|
|
self.assertEqual(
|
|
alignments.metadata["PG"][0],
|
|
{
|
|
"ID": "bwa",
|
|
"PN": "bwa",
|
|
"VN": "0.6.2-r126",
|
|
},
|
|
)
|
|
n = 0
|
|
for alignment in alignments:
|
|
n += 1
|
|
self.assertEqual(n, 200)
|
|
self.assertIsNone(alignment.sequences[0])
|
|
self.assertEqual(
|
|
alignment.sequences[1].id, "HWI-1KL120:88:D0LRBACXX:1:1101:5516:2195"
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"GGCCCAACCGTCCTATATGAGATGTAGCATGGTACAGAACAAACTGCTTACACAGGTCTCACTAGTTAGAAACCTGTGGGCCATGGAGGTCAGACATCCAT",
|
|
)
|
|
self.assertEqual(alignment.flag, 141)
|
|
self.assertEqual(alignment.mapq, 0)
|
|
self.assertIsNone(alignment.coordinates)
|
|
self.assertEqual(
|
|
alignment.sequences[1].letter_annotations["phred_quality"],
|
|
[
|
|
33,
|
|
30,
|
|
16,
|
|
32,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
32,
|
|
37,
|
|
37,
|
|
37,
|
|
35,
|
|
36,
|
|
38,
|
|
37,
|
|
36,
|
|
38,
|
|
39,
|
|
36,
|
|
36,
|
|
35,
|
|
30,
|
|
35,
|
|
30,
|
|
36,
|
|
33,
|
|
27,
|
|
36,
|
|
38,
|
|
33,
|
|
26,
|
|
37,
|
|
29,
|
|
37,
|
|
39,
|
|
40,
|
|
29,
|
|
38,
|
|
36,
|
|
33,
|
|
39,
|
|
36,
|
|
37,
|
|
31,
|
|
31,
|
|
27,
|
|
33,
|
|
37,
|
|
29,
|
|
35,
|
|
30,
|
|
37,
|
|
27,
|
|
37,
|
|
33,
|
|
28,
|
|
34,
|
|
29,
|
|
37,
|
|
26,
|
|
34,
|
|
31,
|
|
37,
|
|
34,
|
|
22,
|
|
31,
|
|
28,
|
|
26,
|
|
28,
|
|
36,
|
|
28,
|
|
22,
|
|
28,
|
|
30,
|
|
31,
|
|
26,
|
|
26,
|
|
26,
|
|
23,
|
|
20,
|
|
21,
|
|
30,
|
|
31,
|
|
26,
|
|
26,
|
|
26,
|
|
26,
|
|
20,
|
|
20,
|
|
24,
|
|
7,
|
|
11,
|
|
11,
|
|
20,
|
|
30,
|
|
18,
|
|
29,
|
|
20,
|
|
29,
|
|
31,
|
|
],
|
|
)
|
|
|
|
def test_sam2(self):
|
|
alignments = Align.parse("SamBam/sam2.sam", "sam")
|
|
self.assertEqual(len(alignments.targets), 1)
|
|
self.assertEqual(alignments.targets[0].id, "1")
|
|
self.assertEqual(len(alignments.targets[0].seq), 239940)
|
|
self.assertEqual(
|
|
alignments.metadata["PG"][0],
|
|
{
|
|
"ID": "bwa",
|
|
"PN": "bwa",
|
|
"VN": "0.6.2-r126",
|
|
},
|
|
)
|
|
n = 0
|
|
for alignment in alignments:
|
|
if n == 8:
|
|
self.assertEqual(alignment.sequences[0].id, "1")
|
|
self.assertEqual(len(alignment.sequences[0].seq), 239940)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq.defined_ranges, ((132615, 132716),)
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq[132615:132716],
|
|
"GGTCACACCCTGTCCTCCTCCTACACATACTCGGATGCTTCCTCCTCAACCTTGGCACCCACCTCCTTCTTACTGGGCCCAGGAGCCTTCAAAGCCCAGGA",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].id,
|
|
"HWI-1KL120:88:D0LRBACXX:1:1101:2205:2204",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TCCTGGGCATTGAAGGCTCCTGGGCCCAGTAAGAAGGAGGTGGGTGCCAAGGTTGAGGAGGAAGCATCCGAGTATGTGTAGGAGGAGGACAAGGTGGGACC",
|
|
)
|
|
self.assertEqual(alignment.flag, 83)
|
|
self.assertEqual(alignment.mapq, 60)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates, np.array([[132615, 132716], [101, 0]])
|
|
)
|
|
)
|
|
self.assertEqual(alignment.rnext, "1")
|
|
self.assertEqual(alignment.pnext, 132490)
|
|
self.assertEqual(alignment.tlen, -226)
|
|
self.assertEqual(
|
|
alignment.sequences[1].letter_annotations["phred_quality"],
|
|
[
|
|
33,
|
|
33,
|
|
33,
|
|
31,
|
|
30,
|
|
34,
|
|
29,
|
|
30,
|
|
30,
|
|
30,
|
|
34,
|
|
33,
|
|
33,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
34,
|
|
34,
|
|
29,
|
|
34,
|
|
29,
|
|
29,
|
|
34,
|
|
30,
|
|
30,
|
|
30,
|
|
28,
|
|
35,
|
|
36,
|
|
36,
|
|
36,
|
|
35,
|
|
37,
|
|
36,
|
|
35,
|
|
33,
|
|
38,
|
|
38,
|
|
39,
|
|
40,
|
|
40,
|
|
36,
|
|
36,
|
|
35,
|
|
28,
|
|
39,
|
|
37,
|
|
32,
|
|
38,
|
|
40,
|
|
40,
|
|
39,
|
|
35,
|
|
39,
|
|
38,
|
|
35,
|
|
30,
|
|
38,
|
|
40,
|
|
41,
|
|
41,
|
|
41,
|
|
40,
|
|
39,
|
|
40,
|
|
38,
|
|
37,
|
|
35,
|
|
39,
|
|
37,
|
|
40,
|
|
41,
|
|
41,
|
|
41,
|
|
40,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
39,
|
|
39,
|
|
39,
|
|
39,
|
|
37,
|
|
37,
|
|
37,
|
|
37,
|
|
37,
|
|
37,
|
|
34,
|
|
34,
|
|
34,
|
|
],
|
|
)
|
|
self.assertEqual(len(alignment.annotations), 9)
|
|
self.assertEqual(alignment.annotations["XT"], "U")
|
|
self.assertEqual(alignment.annotations["NM"], 3)
|
|
self.assertEqual(alignment.annotations["SM"], 37)
|
|
self.assertEqual(alignment.annotations["AM"], 37)
|
|
self.assertEqual(alignment.annotations["X0"], 1)
|
|
self.assertEqual(alignment.annotations["X1"], 0)
|
|
self.assertEqual(alignment.annotations["XM"], 3)
|
|
self.assertEqual(alignment.annotations["XO"], 0)
|
|
self.assertEqual(alignment.annotations["XG"], 0)
|
|
elif n == 9:
|
|
self.assertEqual(alignment.sequences[0].id, "1")
|
|
self.assertEqual(len(alignment.sequences[0].seq), 239940)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq.defined_ranges, ((132490, 132591),)
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq[132490:132591],
|
|
"GCAACAAGGGCTTTGGTGGGAAGGTATTTGCACCTGTCATTCCTTCCTCCTTTACTCCTGCCGCCCCTTGCTGGATCCTGAGCCCCCAGGGTCCCCCGATC",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].id,
|
|
"HWI-1KL120:88:D0LRBACXX:1:1101:2205:2204",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"GCAACAAGGGCTTTGGTGGGAAGGTATCTGCACCTGTCATTCCTTCCTCCTTTACTCCTGCCGCCCCTTGCTGGATCCTGAGCCCCCAGGGTCCCCCGATC",
|
|
)
|
|
self.assertEqual(alignment.flag, 163)
|
|
self.assertEqual(alignment.mapq, 60)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates, np.array([[132490, 132591], [0, 101]])
|
|
)
|
|
)
|
|
self.assertEqual(alignment.rnext, "1")
|
|
self.assertEqual(alignment.pnext, 132615)
|
|
self.assertEqual(alignment.tlen, 226)
|
|
self.assertEqual(
|
|
alignment.sequences[1].letter_annotations["phred_quality"],
|
|
[
|
|
34,
|
|
34,
|
|
34,
|
|
35,
|
|
37,
|
|
37,
|
|
37,
|
|
37,
|
|
39,
|
|
39,
|
|
39,
|
|
39,
|
|
39,
|
|
41,
|
|
41,
|
|
39,
|
|
36,
|
|
39,
|
|
40,
|
|
41,
|
|
40,
|
|
40,
|
|
40,
|
|
41,
|
|
30,
|
|
36,
|
|
39,
|
|
40,
|
|
41,
|
|
40,
|
|
40,
|
|
41,
|
|
41,
|
|
41,
|
|
38,
|
|
39,
|
|
37,
|
|
38,
|
|
34,
|
|
39,
|
|
38,
|
|
41,
|
|
41,
|
|
40,
|
|
40,
|
|
40,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
39,
|
|
40,
|
|
40,
|
|
40,
|
|
40,
|
|
40,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
40,
|
|
41,
|
|
39,
|
|
37,
|
|
37,
|
|
33,
|
|
36,
|
|
34,
|
|
36,
|
|
36,
|
|
35,
|
|
35,
|
|
33,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
29,
|
|
31,
|
|
27,
|
|
20,
|
|
24,
|
|
32,
|
|
33,
|
|
35,
|
|
35,
|
|
33,
|
|
33,
|
|
2,
|
|
2,
|
|
2,
|
|
],
|
|
)
|
|
self.assertEqual(len(alignment.annotations), 9)
|
|
self.assertEqual(alignment.annotations["XT"], "U")
|
|
self.assertEqual(alignment.annotations["NM"], 1)
|
|
self.assertEqual(alignment.annotations["SM"], 37)
|
|
self.assertEqual(alignment.annotations["AM"], 37)
|
|
self.assertEqual(alignment.annotations["X0"], 1)
|
|
self.assertEqual(alignment.annotations["X1"], 0)
|
|
self.assertEqual(alignment.annotations["XM"], 1)
|
|
self.assertEqual(alignment.annotations["XO"], 0)
|
|
self.assertEqual(alignment.annotations["XG"], 0)
|
|
elif n == 100:
|
|
self.assertEqual(alignment.sequences[0].id, "1")
|
|
self.assertEqual(len(alignment.sequences[0].seq), 239940)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq.defined_ranges, ((137538, 137639),)
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq[137538:137639],
|
|
"AAAGTTCGGGGCCTACAAAGGCGGTTGGGAGCTGGGCAGGAGTTGAGCCAAAAGAGCTTGCTTACTTGCTGGGAGGCAGGGCCGGGAGAGCCCGACTTCAG",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].id,
|
|
"HWI-1KL120:88:D0LRBACXX:1:1101:4673:2125",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"AAAGTTCGGGGCCTACAAAGGCGGTTGGGAGCTGGGCAGGAGTTGAGCCAAAAGAGCTTGCTTACTTGCTGGGAGGCAGGACCGGGAGAGGCCGACTTCAG",
|
|
)
|
|
self.assertEqual(alignment.flag, 97)
|
|
self.assertEqual(alignment.mapq, 37)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates, np.array([[137538, 137639], [0, 101]])
|
|
)
|
|
)
|
|
self.assertEqual(alignment.rnext, "1")
|
|
self.assertEqual(alignment.pnext, 135649)
|
|
self.assertEqual(alignment.tlen, -1788)
|
|
self.assertEqual(
|
|
alignment.sequences[1].letter_annotations["phred_quality"],
|
|
[
|
|
34,
|
|
34,
|
|
34,
|
|
37,
|
|
37,
|
|
37,
|
|
37,
|
|
37,
|
|
39,
|
|
39,
|
|
39,
|
|
39,
|
|
39,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
38,
|
|
39,
|
|
38,
|
|
40,
|
|
41,
|
|
40,
|
|
40,
|
|
40,
|
|
41,
|
|
41,
|
|
40,
|
|
41,
|
|
40,
|
|
39,
|
|
39,
|
|
39,
|
|
37,
|
|
37,
|
|
34,
|
|
34,
|
|
34,
|
|
36,
|
|
36,
|
|
34,
|
|
36,
|
|
36,
|
|
35,
|
|
35,
|
|
33,
|
|
35,
|
|
35,
|
|
35,
|
|
34,
|
|
35,
|
|
35,
|
|
35,
|
|
34,
|
|
35,
|
|
34,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
30,
|
|
33,
|
|
31,
|
|
33,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
33,
|
|
35,
|
|
35,
|
|
35,
|
|
33,
|
|
29,
|
|
31,
|
|
35,
|
|
33,
|
|
31,
|
|
34,
|
|
34,
|
|
35,
|
|
],
|
|
)
|
|
self.assertEqual(len(alignment.annotations), 9)
|
|
self.assertEqual(alignment.annotations["XT"], "U")
|
|
self.assertEqual(alignment.annotations["NM"], 2)
|
|
self.assertEqual(alignment.annotations["SM"], 37)
|
|
self.assertEqual(alignment.annotations["AM"], 37)
|
|
self.assertEqual(alignment.annotations["X0"], 1)
|
|
self.assertEqual(alignment.annotations["X1"], 0)
|
|
self.assertEqual(alignment.annotations["XM"], 2)
|
|
self.assertEqual(alignment.annotations["XO"], 0)
|
|
self.assertEqual(alignment.annotations["XG"], 0)
|
|
elif n == 101:
|
|
self.assertEqual(alignment.sequences[0].id, "1")
|
|
self.assertEqual(len(alignment.sequences[0].seq), 239940)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq.defined_ranges, ((135649, 135750),)
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[0].seq[135649:135750],
|
|
"TGGAGAGGCCACCGCGAGGCCTGAGCTGGGCCTGGGGAGCTTGGCTTAGGGAAGTTGTGGGCCTACCAGGGCCGCTGGGAGCTGGGCAGGAGCTGAGTCCA",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].id,
|
|
"HWI-1KL120:88:D0LRBACXX:1:1101:4673:2125",
|
|
)
|
|
self.assertEqual(
|
|
alignment.sequences[1].seq,
|
|
"TGGACTCAGCTCCTGCCCAGCTCCCAGCGGCCCTGGTAGGCCCACAACTTCCCGAAGCCAAGCTCCCCAGGCCCAGCTCAGGCCTCACGGTGGCCTCTCCA",
|
|
)
|
|
self.assertEqual(alignment.flag, 145)
|
|
self.assertEqual(alignment.mapq, 37)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
alignment.coordinates, np.array([[135649, 135750], [101, 0]])
|
|
)
|
|
)
|
|
self.assertEqual(alignment.rnext, "1")
|
|
self.assertEqual(alignment.pnext, 137538)
|
|
self.assertEqual(alignment.tlen, 1788)
|
|
self.assertEqual(
|
|
alignment.sequences[1].letter_annotations["phred_quality"],
|
|
[
|
|
34,
|
|
34,
|
|
34,
|
|
32,
|
|
33,
|
|
34,
|
|
32,
|
|
32,
|
|
33,
|
|
33,
|
|
31,
|
|
33,
|
|
33,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
33,
|
|
35,
|
|
34,
|
|
35,
|
|
34,
|
|
35,
|
|
35,
|
|
33,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
33,
|
|
30,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
35,
|
|
34,
|
|
33,
|
|
35,
|
|
36,
|
|
34,
|
|
36,
|
|
35,
|
|
37,
|
|
37,
|
|
39,
|
|
37,
|
|
39,
|
|
40,
|
|
40,
|
|
41,
|
|
41,
|
|
40,
|
|
41,
|
|
41,
|
|
41,
|
|
40,
|
|
41,
|
|
41,
|
|
40,
|
|
40,
|
|
39,
|
|
39,
|
|
38,
|
|
41,
|
|
40,
|
|
41,
|
|
40,
|
|
41,
|
|
40,
|
|
41,
|
|
40,
|
|
38,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
40,
|
|
41,
|
|
40,
|
|
40,
|
|
41,
|
|
41,
|
|
41,
|
|
41,
|
|
39,
|
|
39,
|
|
39,
|
|
39,
|
|
39,
|
|
37,
|
|
37,
|
|
37,
|
|
37,
|
|
37,
|
|
34,
|
|
34,
|
|
34,
|
|
],
|
|
)
|
|
self.assertEqual(len(alignment.annotations), 9)
|
|
self.assertEqual(alignment.annotations["XT"], "U")
|
|
self.assertEqual(alignment.annotations["NM"], 2)
|
|
self.assertEqual(alignment.annotations["SM"], 37)
|
|
self.assertEqual(alignment.annotations["AM"], 37)
|
|
self.assertEqual(alignment.annotations["X0"], 1)
|
|
self.assertEqual(alignment.annotations["X1"], 0)
|
|
self.assertEqual(alignment.annotations["XM"], 2)
|
|
self.assertEqual(alignment.annotations["XO"], 0)
|
|
self.assertEqual(alignment.annotations["XG"], 0)
|
|
else:
|
|
self.assertIsNone(alignment.sequences[0])
|
|
self.assertEqual(alignment.mapq, 0)
|
|
self.assertIsNone(alignment.coordinates)
|
|
n += 1
|
|
self.assertEqual(n, 200)
|
|
|
|
|
|
class TestAlign_clipping(unittest.TestCase):
|
|
def test_6M(self):
|
|
"""Test alignment starting at non-zero position."""
|
|
target_seq = Seq("AAAAAAAACCCCCC")
|
|
query_seq = Seq("CCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[8, 14], [0, 6]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 8 CCCCCC 14
|
|
0 |||||| 6
|
|
query 0 CCCCCC 6
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(line, "query\t0\ttarget\t9\t255\t6M\t*\t0\t0\tCCCCCC\t*\n")
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 8)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['C', 'C', 'C', 'C', 'C', 'C'],
|
|
['C', 'C', 'C', 'C', 'C', 'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_8D6M_ex1(self):
|
|
"""Test alignment starting with deletion."""
|
|
target_seq = Seq("AAAAAAAACCCCCC")
|
|
query_seq = Seq("CCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[0, 8, 14], [0, 0, 6]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 AAAAAAAACCCCCC 14
|
|
0 --------|||||| 14
|
|
query 0 --------CCCCCC 6
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(line, "query\t0\ttarget\t1\t255\t8D6M\t*\t0\t0\tCCCCCC\t*\n")
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 0)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "8D6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
|
|
'C'],
|
|
['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
|
|
'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 8:
|
|
left_gaps = 8:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 8:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 7;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 8)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 8)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 8)
|
|
self.assertEqual(counts.gaps, 8)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_8D6M_ex2(self):
|
|
"""Test alignment starting with deletion at non-zero position."""
|
|
target_seq = Seq("GGGGAAAAAAAACCCCCC")
|
|
query_seq = Seq("CCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[4, 12, 18], [0, 0, 6]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 4 AAAAAAAACCCCCC 18
|
|
0 --------|||||| 14
|
|
query 0 --------CCCCCC 6
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(line, "query\t0\ttarget\t5\t255\t8D6M\t*\t0\t0\tCCCCCC\t*\n")
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 4)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "8D6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
|
|
'C'],
|
|
['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
|
|
'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 8:
|
|
left_gaps = 8:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 8:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 7;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 8)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 8)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 8)
|
|
self.assertEqual(counts.gaps, 8)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_8I6M_ex1(self):
|
|
"""Test alignment starting with insertion."""
|
|
target_seq = Seq("CCCCCC")
|
|
query_seq = Seq("AAAAAAAACCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[0, 0, 6], [0, 8, 14]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 --------CCCCCC 6
|
|
0 --------|||||| 14
|
|
query 0 AAAAAAAACCCCCC 14
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(
|
|
line, "query\t0\ttarget\t1\t255\t8I6M\t*\t0\t0\tAAAAAAAACCCCCC\t*\n"
|
|
)
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 0)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "8I6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
|
|
'C'],
|
|
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
|
|
'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 8:
|
|
left_gaps = 8:
|
|
left_insertions = 8:
|
|
open_left_insertions = 1,
|
|
extend_left_insertions = 7;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 8)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 8)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 8)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 8)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_8I6M_ex2(self):
|
|
"""Test alignment starting with insertion at non-zero position."""
|
|
target_seq = Seq("GGGGCCCCCC")
|
|
query_seq = Seq("AAAAAAAACCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[4, 4, 10], [0, 8, 14]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 4 --------CCCCCC 10
|
|
0 --------|||||| 14
|
|
query 0 AAAAAAAACCCCCC 14
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(
|
|
line, "query\t0\ttarget\t5\t255\t8I6M\t*\t0\t0\tAAAAAAAACCCCCC\t*\n"
|
|
)
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 4)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "8I6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
|
|
'C'],
|
|
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
|
|
'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 8:
|
|
left_gaps = 8:
|
|
left_insertions = 8:
|
|
open_left_insertions = 1,
|
|
extend_left_insertions = 7;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 8)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 8)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 8)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 8)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_8S6M(self):
|
|
"""Test alignment starting with soft clip."""
|
|
target_seq = Seq("CCCCCC")
|
|
query_seq = Seq("AAAAAAAACCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[0, 6], [8, 14]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 CCCCCC 6
|
|
0 |||||| 6
|
|
query 8 CCCCCC 14
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(
|
|
line, "query\t0\ttarget\t1\t255\t8S6M\t*\t0\t0\tAAAAAAAACCCCCC\t*\n"
|
|
)
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 0)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "8S6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['C', 'C', 'C', 'C', 'C', 'C'],
|
|
['C', 'C', 'C', 'C', 'C', 'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_4S8D6M(self):
|
|
"""Test alignment starting with soft clip followed by deletion."""
|
|
target_seq = Seq("AAAAAAAACCCCCC")
|
|
query_seq = Seq("GGGGCCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[0, 8, 14], [4, 4, 10]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 AAAAAAAACCCCCC 14
|
|
0 --------|||||| 14
|
|
query 4 --------CCCCCC 10
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(
|
|
line, "query\t0\ttarget\t1\t255\t4S8D6M\t*\t0\t0\tGGGGCCCCCC\t*\n"
|
|
)
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 0)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "4S8D6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
|
|
'C'],
|
|
['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
|
|
'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 8:
|
|
left_gaps = 8:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 8:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 7;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 8)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 8)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 8)
|
|
self.assertEqual(counts.gaps, 8)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_4I8D6M(self):
|
|
"""Test alignment starting with insertion followed by deletion."""
|
|
target_seq = Seq("AAAAAAAACCCCCC")
|
|
query_seq = Seq("GGGGCCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[0, 0, 8, 14], [0, 4, 4, 10]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 ----AAAAAAAACCCCCC 14
|
|
0 ------------|||||| 18
|
|
query 0 GGGG--------CCCCCC 10
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(
|
|
line, "query\t0\ttarget\t1\t255\t4I8D6M\t*\t0\t0\tGGGGCCCCCC\t*\n"
|
|
)
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 0)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "4I8D6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['-', '-', '-', '-', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C',
|
|
'C', 'C', 'C', 'C', 'C'],
|
|
['G', 'G', 'G', 'G', '-', '-', '-', '-', '-', '-', '-', '-', 'C',
|
|
'C', 'C', 'C', 'C', 'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 12 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 12:
|
|
left_gaps = 4:
|
|
left_insertions = 4:
|
|
open_left_insertions = 1,
|
|
extend_left_insertions = 3;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 8:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 8:
|
|
open_internal_deletions = 1,
|
|
extend_internal_deletions = 7;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 4)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 8)
|
|
self.assertEqual(counts.left_gaps, 4)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 8)
|
|
self.assertEqual(counts.insertions, 4)
|
|
self.assertEqual(counts.deletions, 8)
|
|
self.assertEqual(counts.gaps, 12)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_4S6M(self):
|
|
"""Test alignment starting with soft clip at non-zero position."""
|
|
target_seq = Seq("AAAAAAAACCCCCC")
|
|
query_seq = Seq("GGGGCCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[8, 14], [4, 10]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 8 CCCCCC 14
|
|
0 |||||| 6
|
|
query 4 CCCCCC 10
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(
|
|
line, "query\t0\ttarget\t9\t255\t4S6M\t*\t0\t0\tGGGGCCCCCC\t*\n"
|
|
)
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 8)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "4S6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['C', 'C', 'C', 'C', 'C', 'C'],
|
|
['C', 'C', 'C', 'C', 'C', 'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 0 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 0:
|
|
left_gaps = 0:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 0)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 0)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 0)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_4D8I6M(self):
|
|
"""Test alignment starting with deletion followed by insertion."""
|
|
target_seq = Seq("GGGGCCCCCC")
|
|
query_seq = Seq("AAAAAAAACCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[0, 4, 4, 10], [0, 0, 8, 14]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 GGGG--------CCCCCC 10
|
|
0 ------------|||||| 18
|
|
query 0 ----AAAAAAAACCCCCC 14
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(
|
|
line, "query\t0\ttarget\t1\t255\t4D8I6M\t*\t0\t0\tAAAAAAAACCCCCC\t*\n"
|
|
)
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 0)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "4D8I6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['G', 'G', 'G', 'G', '-', '-', '-', '-', '-', '-', '-', '-', 'C',
|
|
'C', 'C', 'C', 'C', 'C'],
|
|
['-', '-', '-', '-', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C',
|
|
'C', 'C', 'C', 'C', 'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 12 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 12:
|
|
left_gaps = 4:
|
|
left_insertions = 0:
|
|
open_left_insertions = 0,
|
|
extend_left_insertions = 0;
|
|
left_deletions = 4:
|
|
open_left_deletions = 1,
|
|
extend_left_deletions = 3;
|
|
internal_gaps = 8:
|
|
internal_insertions = 8:
|
|
open_internal_insertions = 1,
|
|
extend_internal_insertions = 7;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 0)
|
|
self.assertEqual(counts.left_deletions, 4)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 8)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 4)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 8)
|
|
self.assertEqual(counts.insertions, 8)
|
|
self.assertEqual(counts.deletions, 4)
|
|
self.assertEqual(counts.gaps, 12)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
def test_4S8I6M(self):
|
|
"""Test alignment starting with soft clip followed by insertion."""
|
|
target_seq = Seq("CCCCCC")
|
|
query_seq = Seq("GGGGAAAAAAAACCCCCC")
|
|
target = SeqRecord(target_seq, id="target")
|
|
query = SeqRecord(query_seq, id="query")
|
|
sequences = [target, query]
|
|
coordinates = np.array([[0, 0, 6], [4, 12, 18]])
|
|
alignment = Alignment(sequences, coordinates)
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 --------CCCCCC 6
|
|
0 --------|||||| 14
|
|
query 4 AAAAAAAACCCCCC 18
|
|
""",
|
|
)
|
|
line = alignment.format("sam")
|
|
self.assertEqual(
|
|
line, "query\t0\ttarget\t1\t255\t4S8I6M\t*\t0\t0\tGGGGAAAAAAAACCCCCC\t*\n"
|
|
)
|
|
fields = line.split()
|
|
pos = int(fields[3]) - 1
|
|
self.assertEqual(pos, 0)
|
|
cigar = fields[5]
|
|
self.assertEqual(cigar, "4S8I6M")
|
|
stream = StringIO(line)
|
|
alignments = Align.parse(stream, "sam")
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
np.array(alignment, "U"),
|
|
# fmt: off
|
|
np.array([['-', '-', '-', '-', '-', '-', '-', '-', 'C', 'C', 'C', 'C', 'C',
|
|
'C'],
|
|
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
|
|
'C']], dtype='U')
|
|
# fmt: on
|
|
)
|
|
)
|
|
counts = alignment.counts()
|
|
self.assertEqual(
|
|
repr(counts),
|
|
"<AlignmentCounts object (6 aligned letters; 6 identities; 0 mismatches; 8 gaps) at 0x%x>"
|
|
% id(counts),
|
|
)
|
|
self.assertEqual(
|
|
str(counts),
|
|
"""\
|
|
AlignmentCounts object with
|
|
aligned = 6:
|
|
identities = 6,
|
|
mismatches = 0.
|
|
gaps = 8:
|
|
left_gaps = 8:
|
|
left_insertions = 8:
|
|
open_left_insertions = 1,
|
|
extend_left_insertions = 7;
|
|
left_deletions = 0:
|
|
open_left_deletions = 0,
|
|
extend_left_deletions = 0;
|
|
internal_gaps = 0:
|
|
internal_insertions = 0:
|
|
open_internal_insertions = 0,
|
|
extend_internal_insertions = 0;
|
|
internal_deletions = 0:
|
|
open_internal_deletions = 0,
|
|
extend_internal_deletions = 0;
|
|
right_gaps = 0:
|
|
right_insertions = 0:
|
|
open_right_insertions = 0,
|
|
extend_right_insertions = 0;
|
|
right_deletions = 0:
|
|
open_right_deletions = 0,
|
|
extend_right_deletions = 0.
|
|
""",
|
|
)
|
|
self.assertEqual(counts.left_insertions, 8)
|
|
self.assertEqual(counts.left_deletions, 0)
|
|
self.assertEqual(counts.right_insertions, 0)
|
|
self.assertEqual(counts.right_deletions, 0)
|
|
self.assertEqual(counts.internal_insertions, 0)
|
|
self.assertEqual(counts.internal_deletions, 0)
|
|
self.assertEqual(counts.left_gaps, 8)
|
|
self.assertEqual(counts.right_gaps, 0)
|
|
self.assertEqual(counts.internal_gaps, 0)
|
|
self.assertEqual(counts.insertions, 8)
|
|
self.assertEqual(counts.deletions, 0)
|
|
self.assertEqual(counts.gaps, 8)
|
|
self.assertEqual(counts.aligned, 6)
|
|
self.assertEqual(counts.identities, 6)
|
|
self.assertEqual(counts.mismatches, 0)
|
|
alignment = next(alignments)
|
|
stream.close()
|
|
self.assertTrue(np.array_equal(alignment.coordinates, coordinates))
|
|
|
|
|
|
class TestAlign_strand(unittest.TestCase):
|
|
def test_format(self):
|
|
"""Test alignment with the target on the opposite strand."""
|
|
sequences = ["AACAGCAGCGTGTCG", "CAGCTAGCGAA"]
|
|
coordinates = np.array(
|
|
[[0, 2, 2, 3, 4, 6, 6, 9, 10, 12, 15], [11, 11, 9, 8, 8, 6, 5, 2, 2, 0, 0]]
|
|
)
|
|
alignment = Alignment(sequences, coordinates)
|
|
alignment.score = 8
|
|
line = """\
|
|
query 16 target 1 255 2D2I1M1D2M1I3M1D2M3D * 0 0 TTCGCTAGCTG * AS:i:8
|
|
"""
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 AA--CAGC-AGCGTGTCG 15
|
|
0 ----|-||-|||-||--- 18
|
|
query 11 --TTC-GCTAGC-TG--- 0
|
|
""",
|
|
)
|
|
self.assertEqual(format(alignment, "sam"), line)
|
|
alignment.coordinates = alignment.coordinates[:, ::-1]
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 15 CGACACGCT-GCTG--TT 0
|
|
0 ---||-|||-||-|---- 18
|
|
query 0 ---CA-GCTAGC-GAA-- 11
|
|
""",
|
|
)
|
|
self.assertEqual(format(alignment, "sam"), line)
|
|
alignment.coordinates = alignment.coordinates[:, ::-1]
|
|
line = """\
|
|
query 16 target 1 255 3D2M1D3M1I2M1D1M2I2D * 0 0 CAGCTAGCGAA *
|
|
"""
|
|
alignment = alignment.reverse_complement()
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 0 CGACACGCT-GCTG--TT 15
|
|
0 ---||-|||-||-|---- 18
|
|
query 11 ---CA-GCTAGC-GAA-- 0
|
|
""",
|
|
)
|
|
self.assertEqual(format(alignment, "sam"), line)
|
|
alignment.coordinates = alignment.coordinates[:, ::-1]
|
|
self.assertEqual(
|
|
str(alignment),
|
|
"""\
|
|
target 15 AA--CAGC-AGCGTGTCG 0
|
|
0 ----|-||-|||-||--- 18
|
|
query 0 --TTC-GCTAGC-TG--- 11
|
|
""",
|
|
)
|
|
self.assertEqual(format(alignment, "sam"), line)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|