Files
biopython/Tests/test_Align_bigbed.py
mdehoon 66ad0a062e Extend the .counts method of an Alignment (#5011)
* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* updat

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* halfway finished

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* pdate

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* replace precompiler #defines by inline functions

* update

* update

* add tests

* update

* update

* update

* update

* update

* update

* update

* update

* documentation

* update

* avoid stpcpy

* pointer printing

* compiler warning

* testing without codonalign

* testing without codonalign and pairwisealigner

* compiler warning

* adding codonalign and pairwisealigner back in

* remove inline from check_indices

* add inline to check_indices

* update

* add test line 12287 test_pairwise_aligner.py

* update

* update

* update

* update

* change dtype from int32 to "i"

* all done

* testing

* testing

* testing

* testing

* done

* done

* fix how pointers are printed on Windows

* update

* update

* update

* update

* fix id printing on pypy

* style change only

* Use Py_uintptr_t instead of uintptr_t

* fix memory leak

* remove double semicolon

* check if GitHub actions are now pickup up Python version 3.13.5 without hardcoding it

---------

Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local>
Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp>
Co-authored-by: Michiel de Hoon <michiel.dehoon@riken.jp>
2025-07-11 14:40:07 +09:00

8016 lines
315 KiB
Python

# Copyright 2022 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for Align.bigbed module."""
import os
import sys
import tempfile
import unittest
from io import StringIO
from Bio import Align
from Bio import SeqIO
from Bio.Align import bigbed
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
try:
import numpy as np
except ImportError:
from Bio import MissingPythonDependencyError
raise MissingPythonDependencyError(
"Install numpy if you want to use Bio.Align.bigbed."
) from None
for i, argument in enumerate(sys.argv):
if argument == "--big":
big = True
sys.argv.pop(i)
break
else:
big = False
class TestAlign_dna_rna(unittest.TestCase):
# The bigBed file dna_rna.bb was generated using the commands
# sort -k1,1 -k2,2n dna_rna.bed > dna_rna.sorted.bed
# twoBitInfo hg38.2bit hg38.chrom.sizes
# bedToBigBed dna_rna.sorted.bed hg38.chrom.sizes dna_rna.bb
path = "Blat/dna_rna.bb"
def setUp(self):
data = {}
records = SeqIO.parse("Blat/dna.fa", "fasta")
for record in records:
name, start_end = record.id.split(":")
assert name == "chr3"
start, end = start_end.split("-")
start = int(start)
end = int(end)
sequence = str(record.seq)
assert len(sequence) == end - start
data[start] = sequence
self.dna = Seq(data, length=198295559) # hg38 chr3
records = SeqIO.parse("Blat/rna.fa", "fasta")
self.rna = {record.id: record.seq for record in records}
def test_reading(self):
"""Test parsing dna_rna.bb."""
path = "Blat/dna_rna.bb"
alignments = Align.parse(path, "bigbed")
self.check_alignments(alignments)
alignments = iter(alignments)
self.check_alignments(alignments)
with Align.parse(path, "bigbed") as alignments:
self.check_alignments(alignments)
with self.assertRaises(AttributeError):
alignments._stream
with Align.parse(path, "bigbed") as alignments:
pass
with self.assertRaises(AttributeError):
alignments._stream
def test_writing(self):
"""Test writing dna_rna.bb."""
alignments = Align.parse(self.path, "bigbed")
with tempfile.TemporaryFile() as output:
Align.write(alignments, output, "bigbed")
output.flush()
output.seek(0)
alignments = Align.parse(output, "bigbed")
self.check_alignments(alignments)
def check_alignments(self, alignments):
self.assertEqual(
str(alignments.declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
int blockCount; "Number of blocks"
int[blockCount] blockSizes; "Comma separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
)
""",
)
self.assertEqual(len(alignments.targets), 1)
self.assertEqual(alignments.targets[0].id, "chr3")
self.assertEqual(len(alignments.targets[0]), 198295559)
self.assertEqual(len(alignments), 4)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 1711))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_046654.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42530895, 42530958, 42532020,
42532095, 42532563, 42532606],
[ 181, 118, 118,
43, 43, 0]])
# fmt: on
)
)
alignment.target.seq = self.dna
alignment.query.seq = self.rna[alignment.query.id]
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[36., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 40., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 57., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 42., 0., 0., 0., 0.],
[ 2., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 3., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0.],
])
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGTacgt")
# The modified RNAs have gaps in their sequence. As this information is
# not stored in a BED file, we cannot calculate the substitution matrix.
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (181 aligned letters; 175 identities; 6 mismatches; 1530 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 181:
identities = 175,
mismatches = 6.
gaps = 1530:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 1530:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 1530:
open_internal_deletions = 2,
extend_internal_deletions = 1528;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 1530)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 1530)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 1530)
self.assertEqual(counts.gaps, 1530)
self.assertEqual(counts.aligned, 181)
self.assertEqual(counts.identities, 175)
self.assertEqual(counts.mismatches, 6)
alignment = next(alignments)
self.assertEqual(alignment.score, 978)
self.assertEqual(alignment.shape, (2, 1711))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_046654.1_modified")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42530895, 42530922, 42530958, 42532020, 42532037,
42532039, 42532095, 42532563, 42532606],
[ 179, 152, 116, 116, 99,
99, 43, 43, 0]])
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (179 aligned letters; 0 identities; 0 mismatches; 1532 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 179:
identities = 0,
mismatches = 0.
gaps = 1532:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 1532:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 1532:
open_internal_deletions = 3,
extend_internal_deletions = 1529;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 1532)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 1532)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 1532)
self.assertEqual(counts.gaps, 1532)
self.assertEqual(counts.aligned, 179)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 5407))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_111921.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48663767, 48663813, 48665640,
48665722, 48669098, 48669174],
[ 0, 46, 46,
128, 128, 204]])
# fmt: on
)
)
alignment.target.seq = self.dna
alignment.query.seq = self.rna[alignment.query.id]
self.assertTrue(
np.array_equal(
alignment.substitutions,
# fmt: off
np.array([[53., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 35., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 50., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 27., 0., 0., 0., 0.],
[ 9., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 7., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 16., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 7., 0., 0., 0., 0.],
])
)
)
self.assertEqual(alignment.substitutions.alphabet, "ACGTacgt")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (204 aligned letters; 165 identities; 39 mismatches; 5203 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 204:
identities = 165,
mismatches = 39.
gaps = 5203:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 5203:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 5203:
open_internal_deletions = 2,
extend_internal_deletions = 5201;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 5203)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 5203)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 5203)
self.assertEqual(counts.gaps, 5203)
self.assertEqual(counts.aligned, 204)
self.assertEqual(counts.identities, 165)
self.assertEqual(counts.mismatches, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 972)
self.assertEqual(alignment.shape, (2, 5407))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "NR_111921.1_modified")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48663767, 48663795, 48663796, 48663813, 48665640,
48665716, 48665722, 48669098, 48669174],
[ 0, 28, 28, 45, 45,
121, 127, 127, 203]])
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (203 aligned letters; 0 identities; 0 mismatches; 5204 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 203:
identities = 0,
mismatches = 0.
gaps = 5204:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 5204:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 5204:
open_internal_deletions = 3,
extend_internal_deletions = 5201;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 5204)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 5204)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 5204)
self.assertEqual(counts.gaps, 5204)
self.assertEqual(counts.aligned, 203)
self.assertRaises(StopIteration, next, alignments)
class TestAlign_dna(unittest.TestCase):
# The bigBed file psl_34_001.bb was generated using the commands
# sort -k1,1 -k2,2n psl_34_001.bed > psl_34_001.sorted.bed
# twoBitInfo hg19.2bit hg19.chrom.sizes
# bedToBigBed psl_34_001.sorted.bed hg19.chrom.sizes psl_34_001.bb
# The bigBed file psl_34_003.bb was generated using the commands
# sort -k1,1 -k2,2n psl_34_003.bed > psl_34_003.sorted.bed
# twoBitInfo hg19.2bit hg19.chrom.sizes
# bedToBigBed psl_34_003.sorted.bed hg19.chrom.sizes psl_34_003.bb
# The bigBed file psl_34_004.bb was generated using the commands
# sort -k1,1 -k2,2n psl_34_004.bed > psl_34_004.sorted.bed
# twoBitInfo hg19.2bit hg19.chrom.sizes
# bedToBigBed psl_34_004.sorted.bed hg19.chrom.sizes psl_34_004.bb
# The bigBed file psl_34_005.bb was generated using the commands
# sort -k1,1 -k2,2n psl_34_005.bed > psl_34_005.sorted.bed
# twoBitInfo hg19.2bit hg19.chrom.sizes
# bedToBigBed psl_34_005.sorted.bed hg19.chrom.sizes psl_34_005.bb
def check_alignments_psl_34_001(self, alignments):
self.assertEqual(
str(alignments.declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
int blockCount; "Number of blocks"
int[blockCount] blockSizes; "Comma separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
)
""",
)
self.assertEqual(len(alignments.targets), 10)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 249250621)
self.assertEqual(alignments.targets[1].id, "chr10")
self.assertEqual(len(alignments.targets[1]), 135534747)
self.assertEqual(alignments.targets[2].id, "chr13")
self.assertEqual(len(alignments.targets[2]), 115169878)
self.assertEqual(alignments.targets[3].id, "chr18")
self.assertEqual(len(alignments.targets[3]), 78077248)
self.assertEqual(alignments.targets[4].id, "chr19")
self.assertEqual(len(alignments.targets[4]), 59128983)
self.assertEqual(alignments.targets[5].id, "chr2")
self.assertEqual(len(alignments.targets[5]), 243199373)
self.assertEqual(alignments.targets[6].id, "chr22")
self.assertEqual(len(alignments.targets[6]), 51304566)
self.assertEqual(alignments.targets[7].id, "chr4")
self.assertEqual(len(alignments.targets[7]), 191154276)
self.assertEqual(alignments.targets[8].id, "chr8")
self.assertEqual(len(alignments.targets[8]), 146364022)
self.assertEqual(alignments.targets[9].id, "chr9")
self.assertEqual(len(alignments.targets[9]), 141213431)
self.assertEqual(len(alignments), 22)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 50))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 33))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.score, 946)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 824)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 0, 34]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.score, 942)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 34, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.score, 834)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 920)
self.assertEqual(alignment.shape, (2, 25))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 25, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.score, 912)
self.assertEqual(alignment.shape, (2, 51))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759154, 52759160, 52759198],
[ 0, 7, 7, 45]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (45 aligned letters; 0 identities; 0 mismatches; 6 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 45:
identities = 0,
mismatches = 0.
gaps = 6:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 6:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 6)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 6)
self.assertEqual(counts.aligned, 45)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 0, 39]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 930)
self.assertEqual(alignment.shape, (2, 28))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 0, 28]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.score, 848)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 39, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 890)
self.assertEqual(alignment.shape, (2, 170))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 0, 25, 25, 36]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 39, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 17))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 17, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
alignment = next(alignments)
self.assertEqual(alignment.score, 946)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 682)
self.assertEqual(alignment.shape, (2, 44))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183926028],
[ 0, 6, 44]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.score, 834)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 0, 36]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 892)
self.assertEqual(alignment.shape, (2, 37))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 37, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.score, 572)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558167, 37558173, 37558191],
[ 28, 18, 18, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 6 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 6:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 6:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 6)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 6)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 16))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 0, 16]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 0, 41]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.score, 854)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 0, 41]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_001(self):
"""Test reading psl_34_001.bb."""
path = "Blat/psl_34_001.bb"
alignments = Align.parse(path, "bigbed")
self.check_alignments_psl_34_001(alignments)
def test_writing_psl_34_001(self):
"""Test writing psl_34_001.bb."""
path = "Blat/psl_34_001.bb"
alignments = Align.parse(path, "bigbed")
with tempfile.TemporaryFile() as output:
Align.write(alignments, output, "bigbed")
output.flush()
output.seek(0)
alignments = Align.parse(output, "bigbed")
self.check_alignments_psl_34_001(alignments)
def check_alignments_psl_34_003(self, alignments):
self.assertEqual(
str(alignments.declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
int blockCount; "Number of blocks"
int[blockCount] blockSizes; "Comma separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
)
""",
)
self.assertEqual(len(alignments.targets), 3)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 249250621)
self.assertEqual(alignments.targets[1].id, "chr2")
self.assertEqual(len(alignments.targets[1]), 243199373)
self.assertEqual(alignments.targets[2].id, "chr4")
self.assertEqual(len(alignments.targets[2]), 191154276)
self.assertEqual(len(alignments), 3)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 33))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 17))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 17, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 16))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 0, 16]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_003(self):
"""Test reading psl_34_003.bb."""
path = "Blat/psl_34_003.bb"
alignments = Align.parse(path, "bigbed")
self.check_alignments_psl_34_003(alignments)
def test_writing_psl_34_003(self):
"""Test writing psl_34_003.bb."""
path = "Blat/psl_34_003.bb"
alignments = Align.parse(path, "bigbed")
with tempfile.TemporaryFile() as output:
Align.write(alignments, output, "bigbed")
output.flush()
output.seek(0)
alignments = Align.parse(output, "bigbed")
self.check_alignments_psl_34_003(alignments)
def check_alignments_psl_34_004(self, alignments):
self.assertEqual(
str(alignments.declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
int blockCount; "Number of blocks"
int[blockCount] blockSizes; "Comma separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
)
""",
)
self.assertEqual(len(alignments.targets), 10)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 249250621)
self.assertEqual(alignments.targets[1].id, "chr10")
self.assertEqual(len(alignments.targets[1]), 135534747)
self.assertEqual(alignments.targets[2].id, "chr13")
self.assertEqual(len(alignments.targets[2]), 115169878)
self.assertEqual(alignments.targets[3].id, "chr18")
self.assertEqual(len(alignments.targets[3]), 78077248)
self.assertEqual(alignments.targets[4].id, "chr19")
self.assertEqual(len(alignments.targets[4]), 59128983)
self.assertEqual(alignments.targets[5].id, "chr2")
self.assertEqual(len(alignments.targets[5]), 243199373)
self.assertEqual(alignments.targets[6].id, "chr22")
self.assertEqual(len(alignments.targets[6]), 51304566)
self.assertEqual(alignments.targets[7].id, "chr4")
self.assertEqual(len(alignments.targets[7]), 191154276)
self.assertEqual(alignments.targets[8].id, "chr8")
self.assertEqual(len(alignments.targets[8]), 146364022)
self.assertEqual(alignments.targets[9].id, "chr9")
self.assertEqual(len(alignments.targets[9]), 141213431)
self.assertEqual(len(alignments), 19)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 50))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.score, 946)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 824)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 0, 34]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.score, 942)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 34, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.score, 834)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 920)
self.assertEqual(alignment.shape, (2, 25))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 25, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.score, 912)
self.assertEqual(alignment.shape, (2, 51))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759154, 52759160, 52759198],
[ 0, 7, 7, 45]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (45 aligned letters; 0 identities; 0 mismatches; 6 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 45:
identities = 0,
mismatches = 0.
gaps = 6:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 6:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 6)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 6)
self.assertEqual(counts.aligned, 45)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 0, 39]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 930)
self.assertEqual(alignment.shape, (2, 28))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 0, 28]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.score, 848)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 39, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 890)
self.assertEqual(alignment.shape, (2, 170))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 0, 25, 25, 36]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 39, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 946)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 682)
self.assertEqual(alignment.shape, (2, 44))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183926028],
[ 0, 6, 44]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.score, 834)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 0, 36]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 892)
self.assertEqual(alignment.shape, (2, 37))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 37, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.score, 572)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558167, 37558173, 37558191],
[ 28, 18, 18, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 6 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 6:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 6:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 6)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 6)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 0, 41]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.score, 854)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 0, 41]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_004(self):
"""Test reading psl_34_004.bb."""
path = "Blat/psl_34_004.bb"
alignments = Align.parse(path, "bigbed")
self.check_alignments_psl_34_004(alignments)
def test_writing_psl_34_004(self):
"""Test writing psl_34_004.bb."""
path = "Blat/psl_34_004.bb"
alignments = Align.parse(path, "bigbed")
with tempfile.TemporaryFile() as output:
Align.write(alignments, output, "bigbed")
output.flush()
output.seek(0)
alignments = Align.parse(output, "bigbed")
self.check_alignments_psl_34_004(alignments)
def check_alignments_psl_34_005(self, alignments):
self.assertEqual(
str(alignments.declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
int blockCount; "Number of blocks"
int[blockCount] blockSizes; "Comma separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
)
""",
)
self.assertEqual(len(alignments.targets), 10)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 249250621)
self.assertEqual(alignments.targets[1].id, "chr10")
self.assertEqual(len(alignments.targets[1]), 135534747)
self.assertEqual(alignments.targets[2].id, "chr13")
self.assertEqual(len(alignments.targets[2]), 115169878)
self.assertEqual(alignments.targets[3].id, "chr18")
self.assertEqual(len(alignments.targets[3]), 78077248)
self.assertEqual(alignments.targets[4].id, "chr19")
self.assertEqual(len(alignments.targets[4]), 59128983)
self.assertEqual(alignments.targets[5].id, "chr2")
self.assertEqual(len(alignments.targets[5]), 243199373)
self.assertEqual(alignments.targets[6].id, "chr22")
self.assertEqual(len(alignments.targets[6]), 51304566)
self.assertEqual(alignments.targets[7].id, "chr4")
self.assertEqual(len(alignments.targets[7]), 191154276)
self.assertEqual(alignments.targets[8].id, "chr8")
self.assertEqual(len(alignments.targets[8]), 146364022)
self.assertEqual(alignments.targets[9].id, "chr9")
self.assertEqual(len(alignments.targets[9]), 141213431)
self.assertEqual(len(alignments), 22)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 50))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1207056, 1207106],
[ 0, 50]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (50 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 50:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 50)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 33))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[10271783, 10271816],
[ 0, 33]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (33 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 33:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 33)
alignment = next(alignments)
self.assertEqual(alignment.score, 946)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[39368490, 39368526],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 824)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61700837, 61700871],
[ 0, 34]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.score, 942)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[220325687, 220325721],
[ 34, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (34 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 34:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 34)
alignment = next(alignments)
self.assertEqual(alignment.score, 834)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[99388555, 99388591],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 920)
self.assertEqual(alignment.shape, (2, 25))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr10")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[112178171, 112178196],
[ 25, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (25 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 25:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 25)
alignment = next(alignments)
self.assertEqual(alignment.score, 912)
self.assertEqual(alignment.shape, (2, 51))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[52759147, 52759154, 52759160, 52759198],
[ 0, 7, 7, 45]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (45 aligned letters; 0 identities; 0 mismatches; 6 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 45:
identities = 0,
mismatches = 0.
gaps = 6:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 6:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 6)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 6)
self.assertEqual(counts.aligned, 45)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[23891310, 23891349],
[ 0, 39]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 930)
self.assertEqual(alignment.shape, (2, 28))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr18")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[43252217, 43252245],
[ 0, 28]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.score, 848)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[553742, 553781],
[ 39, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 890)
self.assertEqual(alignment.shape, (2, 170))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[35483340, 35483365, 35483499, 35483510],
[ 0, 25, 25, 36]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 134 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 134:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 134:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 134:
open_internal_deletions = 1,
extend_internal_deletions = 133;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 134)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 134)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 134)
self.assertEqual(counts.gaps, 134)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 39))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr19")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[54017130, 54017169],
[ 39, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (39 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 39:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 39)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 17))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[53575980, 53575997],
[ 17, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (17 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 17:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 17)
alignment = next(alignments)
self.assertEqual(alignment.score, 946)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[120641740, 120641776],
[ 36, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 682)
self.assertEqual(alignment.shape, (2, 44))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[183925984, 183925990, 183926028],
[ 0, 6, 44]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (44 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 44:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 44)
alignment = next(alignments)
self.assertEqual(alignment.score, 834)
self.assertEqual(alignment.shape, (2, 36))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[42144400, 42144436],
[ 0, 36]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (36 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 36:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 36)
alignment = next(alignments)
self.assertEqual(alignment.score, 892)
self.assertEqual(alignment.shape, (2, 37))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr22")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[48997405, 48997442],
[ 37, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (37 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 37:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 37)
alignment = next(alignments)
self.assertEqual(alignment.score, 572)
self.assertEqual(alignment.shape, (2, 34))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[37558157, 37558167, 37558173, 37558191],
[ 28, 18, 18, 0]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (28 aligned letters; 0 identities; 0 mismatches; 6 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 28:
identities = 0,
mismatches = 0.
gaps = 6:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 6:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 6:
open_internal_deletions = 1,
extend_internal_deletions = 5;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 6)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 6)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 6)
self.assertEqual(counts.gaps, 6)
self.assertEqual(counts.aligned, 28)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 16))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "hg18_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[61646095, 61646111],
[ 0, 16]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (16 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 16:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 16)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr8")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[95160479, 95160520],
[ 0, 41]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
alignment = next(alignments)
self.assertEqual(alignment.score, 854)
self.assertEqual(alignment.shape, (2, 41))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr9")
self.assertEqual(alignment.query.id, "hg19_dna")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[85737865, 85737906],
[ 0, 41]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (41 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 41:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 41)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_34_005(self):
"""Test reading psl_34_005.bb."""
path = "Blat/psl_34_005.bb"
alignments = Align.parse(path, "bigbed")
self.check_alignments_psl_34_005(alignments)
def test_writing_psl_34_005(self):
"""Test writing psl_34_005.bb."""
path = "Blat/psl_34_005.bb"
alignments = Align.parse(path, "bigbed")
with tempfile.TemporaryFile() as output:
Align.write(alignments, output, "bigbed")
output.flush()
output.seek(0)
alignments = Align.parse(output, "bigbed")
self.check_alignments_psl_34_005(alignments)
class TestAlign_dnax_prot(unittest.TestCase):
# The bigBed file psl_35_001.bb was generated using the commands
# sort -k1,1 -k2,2n psl_35_001.bed > psl_35_001.sorted.bed
# twoBitInfo hg38.2bit hg38.chrom.sizes
# bedToBigBed psl_35_001.sorted.bed hg38.chrom.sizes psl_35_001.bb
path = "Blat/psl_35_001.bb"
def check_alignments(self, alignments):
self.assertEqual(
str(alignments.declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
int blockCount; "Number of blocks"
int[blockCount] blockSizes; "Comma separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
)
""",
)
self.assertEqual(len(alignments.targets), 2)
self.assertEqual(alignments.targets[0].id, "chr13")
self.assertEqual(len(alignments.targets[0]), 114364328)
self.assertEqual(alignments.targets[1].id, "chr4")
self.assertEqual(len(alignments.targets[1]), 190214555)
self.assertEqual(len(alignments), 8)
alignment = next(alignments)
self.assertEqual(alignment.score, 986)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "CAG33136.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[75549820, 75549865, 75567225, 75567312],
[ 0, 45, 45, 132]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (132 aligned letters; 0 identities; 0 mismatches; 17360 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 132:
identities = 0,
mismatches = 0.
gaps = 17360:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 17360:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 17360:
open_internal_deletions = 1,
extend_internal_deletions = 17359;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 17360)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 17360)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 17360)
self.assertEqual(counts.gaps, 17360)
self.assertEqual(counts.aligned, 132)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "CAG33136.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[75560749, 75560881],
[ 0, 132]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (132 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 132:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 132)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "CAG33136.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[75566694, 75566850],
[ 0, 156]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (156 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 156:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 156)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "CAG33136.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[75569459, 75569507],
[ 0, 48]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (48 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 48:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 48)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "CAG33136.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[75594914, 75594989],
[ 0, 75]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (75 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 75:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 75)
alignment = next(alignments)
self.assertEqual(alignment.score, 1000)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr13")
self.assertEqual(alignment.query.id, "CAG33136.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[75604767, 75604827, 75605728, 75605809],
[ 0, 60, 60, 141]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (141 aligned letters; 0 identities; 0 mismatches; 901 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 141:
identities = 0,
mismatches = 0.
gaps = 901:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 901:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 901:
open_internal_deletions = 1,
extend_internal_deletions = 900;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 901)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 901)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 901)
self.assertEqual(counts.gaps, 901)
self.assertEqual(counts.aligned, 141)
alignment = next(alignments)
self.assertEqual(alignment.score, 166)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "CAG33136.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[41257605, 41257731, 41263227, 41263290],
[ 0, 126, 126, 189]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (189 aligned letters; 0 identities; 0 mismatches; 5496 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 189:
identities = 0,
mismatches = 0.
gaps = 5496:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 5496:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 5496:
open_internal_deletions = 1,
extend_internal_deletions = 5495;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 5496)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 5496)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 5496)
self.assertEqual(counts.gaps, 5496)
self.assertEqual(counts.aligned, 189)
alignment = next(alignments)
self.assertEqual(alignment.score, 530)
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr4")
self.assertEqual(alignment.query.id, "CAG33136.1")
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[41260685, 41260787],
[ 0, 102]]),
# fmt: on
)
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (102 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 102:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 102)
self.assertRaises(StopIteration, next, alignments)
def test_reading_psl_35_001(self):
"""Test parsing psl_35_001.bb."""
alignments = Align.parse(self.path, "bigbed")
self.check_alignments(alignments)
def test_writing_psl_35_001(self):
"""Test writing psl_35_001.bb."""
alignments = Align.parse(self.path, "bigbed")
with tempfile.TemporaryFile() as output:
Align.write(alignments, output, "bigbed")
output.flush()
output.seek(0)
alignments = Align.parse(output, "bigbed")
self.check_alignments(alignments)
class TestAlign_bed12(unittest.TestCase):
# The bigBed files were generated using the commands
# twoBitInfo hg19.2bit hg19.chrom.sizes
# bedToBigBed bed3.bed hg19.chrom.sizes bed3.bb
# bedToBigBed bed4.bed hg19.chrom.sizes bed4.bb
# bedToBigBed bed5.bed hg19.chrom.sizes bed5.bb
# bedToBigBed bed6.bed hg19.chrom.sizes bed6.bb
# bedToBigBed bed7.bed hg19.chrom.sizes bed7.bb
# bedToBigBed bed8.bed hg19.chrom.sizes bed8.bb
# bedToBigBed bed9.bed hg19.chrom.sizes bed9.bb
# bedToBigBed bed12.bed hg19.chrom.sizes bed12.bb
def check_autosql(self, declaration, bedN, msg):
if bedN == 3:
self.assertEqual(
str(declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
)
""",
msg=msg,
)
elif bedN == 4:
self.assertEqual(
str(declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
)
""",
msg=msg,
)
elif bedN == 5:
self.assertEqual(
str(declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
)
""",
msg=msg,
)
elif bedN == 6:
self.assertEqual(
str(declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
)
""",
msg=msg,
)
elif bedN == 7:
self.assertEqual(
str(declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
)
""",
msg=msg,
)
elif bedN == 8:
self.assertEqual(
str(declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
)
""",
msg=msg,
)
elif bedN == 9:
self.assertEqual(
str(declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
)
""",
msg=msg,
)
elif bedN == 12:
self.assertEqual(
str(declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
int blockCount; "Number of blocks"
int[blockCount] blockSizes; "Comma separated list of block sizes"
int[blockCount] chromStarts; "Start positions relative to chromStart"
)
""",
msg=msg,
)
def check_alignments(self, alignments, bedN, msg):
self.assertEqual(len(alignments), 2)
alignment = next(alignments)
if bedN >= 5:
self.assertEqual(alignment.score, 960, msg=msg)
self.assertEqual(alignment.shape, (2, 4000), msg=msg)
self.assertLess(
alignment.coordinates[0, 0], alignment.coordinates[0, -1], msg=msg
)
self.assertLess(
alignment.coordinates[1, 0], alignment.coordinates[1, -1], msg=msg
)
self.assertEqual(len(alignment), 2, msg=msg)
self.assertIs(alignment.sequences[0], alignment.target, msg=msg)
self.assertIs(alignment.sequences[1], alignment.query, msg=msg)
self.assertEqual(alignment.target.id, "chr22", msg=msg)
if bedN >= 4:
self.assertEqual(alignment.query.id, "mRNA1", msg=msg)
else:
self.assertIsNone(alignment.query.id, msg=msg)
if bedN == 12:
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[1000, 1567, 4512, 5000],
[ 0, 567, 567, 1055]]),
# fmt: on
),
msg=msg,
)
else:
self.assertTrue(
np.array_equal(
alignment.coordinates,
np.array([[1000, 5000], [0, 4000]]),
),
msg=msg,
)
if bedN >= 7:
self.assertEqual(alignment.thickStart, 1200, msg=msg)
if bedN >= 8:
self.assertEqual(alignment.thickEnd, 4900, msg=msg)
if bedN >= 9:
self.assertEqual(alignment.itemRgb, "255,0,0", msg=msg)
alignment = next(alignments)
if bedN >= 5:
self.assertEqual(alignment.score, 900, msg=msg)
self.assertEqual(alignment.shape, (2, 4000), msg=msg)
self.assertLess(
alignment.coordinates[0, 0], alignment.coordinates[0, -1], msg=msg
)
if bedN >= 6:
self.assertGreater(
alignment.coordinates[1, 0],
alignment.coordinates[1, -1],
msg=msg,
)
else:
self.assertLess(
alignment.coordinates[1, 0],
alignment.coordinates[1, -1],
msg=msg,
)
self.assertEqual(len(alignment), 2, msg=msg)
self.assertIs(alignment.sequences[0], alignment.target, msg=msg)
self.assertIs(alignment.sequences[1], alignment.query, msg=msg)
self.assertEqual(alignment.target.id, "chr22", msg=msg)
if bedN >= 4:
self.assertEqual(alignment.query.id, "mRNA2", msg=msg)
else:
self.assertIsNone(alignment.query.id, msg=msg)
if bedN == 12:
self.assertTrue(
np.array_equal(
alignment.coordinates,
# fmt: off
np.array([[2000, 2433, 5601, 6000],
[ 832, 399, 399, 0]])
# fmt: on
),
msg=msg,
)
elif bedN >= 6:
self.assertTrue(
np.array_equal(
alignment.coordinates,
np.array([[2000, 6000], [4000, 0]]),
),
msg=msg,
)
else:
self.assertTrue(
np.array_equal(
alignment.coordinates,
np.array([[2000, 6000], [0, 4000]]),
),
msg=msg,
)
if bedN >= 7:
self.assertEqual(alignment.thickStart, 2300, msg=msg)
if bedN >= 8:
self.assertEqual(alignment.thickEnd, 5960, msg=msg)
if bedN >= 9:
self.assertEqual(alignment.itemRgb, "0,255,0", msg=msg)
with self.assertRaises(StopIteration) as cm:
next(alignments)
self.fail("More than two alignments reported")
def test_reading(self):
"""Test parsing alignments in file formats BED3 through BED12."""
for bedN in (3, 4, 5, 6, 7, 8, 9, 12):
filename = "bed%d.bb" % bedN
path = os.path.join("Blat", filename)
alignments = Align.parse(path, "bigbed")
msg = "bed%d" % bedN
self.check_autosql(alignments.declaration, bedN, msg)
self.check_alignments(alignments, bedN, msg)
def test_writing(self):
"""Test Writing alignments in file formats BED3 through BED12."""
for bedN in (3, 4, 5, 6, 7, 8, 9, 12):
filename = "bed%d.bb" % bedN
path = os.path.join("Blat", filename)
alignments = Align.parse(path, "bigbed")
with tempfile.TemporaryFile() as output:
Align.write(alignments, output, "bigbed", bedN=bedN)
output.flush()
output.seek(0)
alignments = Align.parse(output, "bigbed")
msg = "bed%d" % bedN
self.check_autosql(alignments.declaration, bedN, msg)
self.check_alignments(alignments, bedN, msg)
class TestAlign_extended_bed(unittest.TestCase):
# The bigBed files bigbed_extended.littleendian.bb and
# bigbed_extended.bigendian.bb are BED9+2 files, with nine predefined BED
# fields and 2 extra (custom) fields. It was created by running
#
# bedToBigBed -as=bedExample2.as -type=bed9+2 -extraIndex=name,geneSymbol bedExample2.bed hg18.chrom.sizes bigbed_extended.bb
#
# where bedExample2.bed contains 10 lines selected from the example BED9+2
# file bedExample2.bed downloaded from UCSC
# (https://genome.ucsc.edu/goldenPath/help/examples/bedExample2.bed)
# and bedExample2.as the associated AutoSQL file, also downloaded from UCSC
# (https://genome.ucsc.edu/goldenPath/help/examples/bedExample2.as)
# declaring the nine predefined BED fields and the two extra fields
#
# The bigBed file bigbed_extended.littleendian.bb was generated on a
# little-endian machine; the bigBed file bigbed_extended.bigendian.bb was
# generated on a big-endian machine.
def test_reading(self):
"""Test parsing bigbed_extended.bb."""
path = "Blat/bigbed_extended.littleendian.bb"
alignments = Align.parse(path, "bigbed")
self.assertEqual(alignments.byteorder, "<")
self.check_alignments(alignments)
path = "Blat/bigbed_extended.bigendian.bb"
alignments = Align.parse(path, "bigbed")
self.assertEqual(alignments.byteorder, ">")
self.check_alignments(alignments)
def check_alignments(self, alignments):
self.assertEqual(
str(alignments.declaration),
"""\
table hg18KGchr7
"UCSC Genes for chr7 with color plus GeneSymbol and SwissProtID"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position of feature on chromosome"
uint chromEnd; "End position of feature on chromosome"
string name; "Name of gene"
uint score; "Score"
char[1] strand; "+ or - for strand"
uint thickStart; "Coding region start"
uint thickEnd; "Coding region end"
uint reserved; "Green on + strand, Red on - strand"
string geneSymbol; "Gene Symbol"
string spID; "SWISS-PROT protein Accession number"
)
""",
)
self.assertEqual(len(alignments.targets), 1)
self.assertEqual(alignments.targets[0].id, "chr7")
self.assertEqual(len(alignments.targets[0]), 158821424)
self.assertEqual(len(alignments), 10)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 60328)
self.assertEqual(alignment.thickEnd, 60328)
self.assertEqual(alignment.itemRgb, "255,0,0")
self.assertEqual(alignment.shape, (2, 1241))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc010krx.1")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[60328, 61569], [1241, 0]]))
)
self.assertEqual(alignment.annotations["geneSymbol"], ".")
self.assertEqual(alignment.annotations["spID"], "PDGFA")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (1241 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 1241:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 1241)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 506606)
self.assertEqual(alignment.thickEnd, 525164)
self.assertEqual(alignment.itemRgb, "255,0,0")
self.assertEqual(alignment.shape, (2, 22585))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc003sir.1")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[503422, 526007], [22585, 0]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], "PDGFA")
self.assertEqual(alignment.annotations["spID"], "P04085")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (22585 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 22585:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 22585)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 504726)
self.assertEqual(alignment.thickEnd, 525164)
self.assertEqual(alignment.itemRgb, "255,0,0")
self.assertEqual(alignment.shape, (2, 22585))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc003sis.1")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[503422, 526007], [22585, 0]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], "PDGFA")
self.assertEqual(alignment.annotations["spID"], "P04085-2")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (22585 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 22585:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 22585)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 507195)
self.assertEqual(alignment.thickEnd, 518820)
self.assertEqual(alignment.itemRgb, "255,0,0")
self.assertEqual(alignment.shape, (2, 12690))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc003sit.1")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[506940, 519630], [12690, 0]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], "PDGFA")
self.assertEqual(alignment.annotations["spID"], "Q32M96")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (12690 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 12690:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 12690)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 556592)
self.assertEqual(alignment.thickEnd, 717668)
self.assertEqual(alignment.itemRgb, "255,0,0")
self.assertEqual(alignment.shape, (2, 162747))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc003siu.1")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[555912, 718659], [162747, 0]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], "PRKAR1B")
self.assertEqual(alignment.annotations["spID"], "Q8N422")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (162747 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 162747:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 162747)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 556592)
self.assertEqual(alignment.thickEnd, 717668)
self.assertEqual(alignment.itemRgb, "255,0,0")
self.assertEqual(alignment.shape, (2, 163357))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc003siv.1")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[555912, 719269], [163357, 0]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], "PRKAR1B")
self.assertEqual(alignment.annotations["spID"], "Q8N422")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (163357 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 163357:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 163357)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 556592)
self.assertEqual(alignment.thickEnd, 717668)
self.assertEqual(alignment.itemRgb, "255,0,0")
self.assertEqual(alignment.shape, (2, 177901))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc003siw.1")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[555912, 733813], [177901, 0]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], "PRKAR1B")
self.assertEqual(alignment.annotations["spID"], "Q8N422")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (177901 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 177901:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 177901)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 585418)
self.assertEqual(alignment.thickEnd, 585418)
self.assertEqual(alignment.itemRgb, "255,0,0")
self.assertEqual(alignment.shape, (2, 22329))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertGreater(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc003six.1")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[585418, 607747], [22329, 0]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], ".")
self.assertEqual(alignment.annotations["spID"], "PRKAR1B")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (22329 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 22329:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 22329)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 733217)
self.assertEqual(alignment.thickEnd, 791816)
self.assertEqual(alignment.itemRgb, "0,255,0")
self.assertEqual(alignment.shape, (2, 59779))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc003siz.2")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[732863, 792642], [0, 59779]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], ".")
self.assertEqual(alignment.annotations["spID"], "DKFZp762F1415")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (59779 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 59779:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 59779)
alignment = next(alignments)
self.assertEqual(alignment.score, 0)
self.assertEqual(alignment.thickStart, 732883)
self.assertEqual(alignment.thickEnd, 791816)
self.assertEqual(alignment.itemRgb, "0,255,0")
self.assertEqual(alignment.shape, (2, 59779))
self.assertLess(alignment.coordinates[0, 0], alignment.coordinates[0, -1])
self.assertLess(alignment.coordinates[1, 0], alignment.coordinates[1, -1])
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr7")
self.assertEqual(alignment.query.id, "uc010krz.1")
self.assertTrue(
np.array_equal(
alignment.coordinates, np.array([[732863, 792642], [0, 59779]])
)
)
self.assertEqual(alignment.annotations["geneSymbol"], "HEATR2")
self.assertEqual(alignment.annotations["spID"], "Q86Y56")
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (59779 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 59779:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 59779)
def test_writing(self):
"""Test writing bigbed_extended.bb."""
byteorder = sys.byteorder # "little" or "big"
path = f"Blat/bigbed_extended.{byteorder}endian.bb"
with open(path, "rb") as stream:
correct = stream.read()
alignments = Align.parse(path, "bigbed")
with open("Blat/bedExample2.as") as stream:
autosql_data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(autosql_data)
with tempfile.TemporaryFile() as output:
Align.write(
alignments,
output,
"bigbed",
bedN=9,
declaration=declaration,
extraIndex=["name", "geneSymbol"],
)
output.flush()
output.seek(0)
data = output.read()
self.assertEqual(correct, data)
alignments = Align.parse(path, "bigbed")
targets = alignments.targets
with tempfile.TemporaryFile() as output:
Align.write(
alignments,
output,
"bigbed",
bedN=9,
declaration=declaration,
targets=targets,
extraIndex=["name", "geneSymbol"],
)
output.flush()
output.seek(0)
data = output.read()
self.assertEqual(correct, data)
class TestAlign_searching(unittest.TestCase):
path = "Blat/bigbedtest.bb"
# The bigBed file bigbedtest.bb contains the following data:
# chr1 10 100 name1 1 +
# chr1 29 39 name2 2 -
# chr1 200 300 name3 3 +
# chr2 50 50 name4 6 +
# chr2 100 110 name5 4 +
# chr2 200 210 name6 5 +
# chr2 220 220 name7 6 +
# chr3 0 0 name8 7 -
def check_alignments(self, alignments):
self.assertEqual(
str(alignments.declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
)
""",
)
self.assertEqual(len(alignments.targets), 3)
self.assertEqual(alignments.targets[0].id, "chr1")
self.assertEqual(len(alignments.targets[0]), 1000)
self.assertEqual(alignments.targets[1].id, "chr2")
self.assertEqual(len(alignments.targets[1]), 2000)
self.assertEqual(alignments.targets[2].id, "chr3")
self.assertEqual(len(alignments.targets[2]), 1000)
self.assertEqual(len(alignments), 8)
alignment = next(alignments)
self.assertEqual(alignment.score, 1)
self.assertEqual(alignment.shape, (2, 90))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "name1")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[10, 100], [0, 90]]))
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (90 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 90:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 90)
alignment = next(alignments)
self.assertEqual(alignment.score, 2)
self.assertEqual(alignment.shape, (2, 10))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "name2")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[29, 39], [10, 0]]))
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (10 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 10:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 10)
alignment = next(alignments)
self.assertEqual(alignment.score, 3)
self.assertEqual(alignment.shape, (2, 100))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr1")
self.assertEqual(alignment.query.id, "name3")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[200, 300], [0, 100]]))
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (100 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 100:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 100)
alignment = next(alignments)
self.assertEqual(alignment.score, 6)
self.assertEqual(alignment.shape, (2, 0))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "name4")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[50, 50], [0, 0]]))
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (0 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 0:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 0)
self.assertEqual(counts.identities, 0)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
self.assertEqual(alignment.score, 4)
self.assertEqual(alignment.shape, (2, 10))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "name5")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[100, 110], [0, 10]]))
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (10 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 10:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 10)
alignment = next(alignments)
self.assertEqual(alignment.score, 5)
self.assertEqual(alignment.shape, (2, 10))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "name6")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[200, 210], [0, 10]]))
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (10 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 10:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 10)
alignment = next(alignments)
self.assertEqual(alignment.score, 6)
self.assertEqual(alignment.shape, (2, 0))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr2")
self.assertEqual(alignment.query.id, "name7")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[220, 220], [0, 0]]))
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (0 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 0:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 0)
self.assertEqual(counts.identities, 0)
self.assertEqual(counts.mismatches, 0)
alignment = next(alignments)
self.assertEqual(alignment.score, 7)
self.assertEqual(alignment.shape, (2, 0))
self.assertEqual(len(alignment), 2)
self.assertIs(alignment.sequences[0], alignment.target)
self.assertIs(alignment.sequences[1], alignment.query)
self.assertEqual(alignment.target.id, "chr3")
self.assertEqual(alignment.query.id, "name8")
self.assertTrue(
np.array_equal(alignment.coordinates, np.array([[0, 0], [0, 0]]))
)
counts = alignment.counts()
self.assertEqual(
repr(counts),
"<AlignmentCounts object (0 aligned letters; 0 identities; 0 mismatches; 0 gaps) at 0x%x>"
% id(counts),
)
self.assertEqual(
str(counts),
"""\
AlignmentCounts object with
aligned = 0:
identities = 0,
mismatches = 0.
gaps = 0:
left_gaps = 0:
left_insertions = 0:
open_left_insertions = 0,
extend_left_insertions = 0;
left_deletions = 0:
open_left_deletions = 0,
extend_left_deletions = 0;
internal_gaps = 0:
internal_insertions = 0:
open_internal_insertions = 0,
extend_internal_insertions = 0;
internal_deletions = 0:
open_internal_deletions = 0,
extend_internal_deletions = 0;
right_gaps = 0:
right_insertions = 0:
open_right_insertions = 0,
extend_right_insertions = 0;
right_deletions = 0:
open_right_deletions = 0,
extend_right_deletions = 0.
""",
)
self.assertEqual(counts.left_insertions, 0)
self.assertEqual(counts.left_deletions, 0)
self.assertEqual(counts.right_insertions, 0)
self.assertEqual(counts.right_deletions, 0)
self.assertEqual(counts.internal_insertions, 0)
self.assertEqual(counts.internal_deletions, 0)
self.assertEqual(counts.left_gaps, 0)
self.assertEqual(counts.right_gaps, 0)
self.assertEqual(counts.internal_gaps, 0)
self.assertEqual(counts.insertions, 0)
self.assertEqual(counts.deletions, 0)
self.assertEqual(counts.gaps, 0)
self.assertEqual(counts.aligned, 0)
self.assertEqual(counts.identities, 0)
self.assertEqual(counts.mismatches, 0)
def test_reading(self):
"""Test reading bigbedtest.bb."""
alignments = Align.parse(self.path, "bigbed")
self.check_alignments(alignments)
def test_writing(self):
"""Test writing bigbedtest.bb."""
alignments = Align.parse(self.path, "bigbed")
with tempfile.TemporaryFile() as output:
Align.write(alignments, output, "bigbed", bedN=6)
output.flush()
output.seek(0)
alignments = Align.parse(output, "bigbed")
self.check_alignments(alignments)
def test_search_chromosome(self):
alignments = Align.parse(self.path, "bigbed")
self.assertEqual(
str(alignments.declaration),
"""\
table bed
"Browser Extensible Data"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item."
uint score; "Score (0-1000)"
char[1] strand; "+ or - for strand"
)
""",
)
selected_alignments = alignments.search("chr2")
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name4", "name5", "name6", "name7"])
def test_search_region(self):
alignments = Align.parse(self.path, "bigbed")
selected_alignments = alignments.search("chr2", 105, 1000)
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name5", "name6", "name7"])
selected_alignments = alignments.search("chr2", 110, 1000)
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name6", "name7"])
selected_alignments = alignments.search("chr2", 40, 50)
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name4"])
selected_alignments = alignments.search("chr2", 50, 50)
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name4"])
selected_alignments = alignments.search("chr2", 50, 200)
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name4", "name5"])
selected_alignments = alignments.search("chr2", 200, 220)
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name6", "name7"])
selected_alignments = alignments.search("chr2", 220, 220)
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name7"])
def test_search_position(self):
alignments = Align.parse(self.path, "bigbed")
selected_alignments = alignments.search("chr1", 250)
names = [alignment.query.id for alignment in selected_alignments]
self.assertEqual(names, ["name3"])
def test_three_iterators(self):
"""Create three iterators and use them concurrently."""
alignments1 = Align.parse(self.path, "bigbed")
alignments2 = alignments1.search("chr2")
alignments3 = alignments1.search("chr2", 110, 1000)
alignment1 = next(alignments1)
self.assertEqual(alignment1.query.id, "name1")
alignment1 = next(alignments1)
self.assertEqual(alignment1.query.id, "name2")
alignment2 = next(alignments2)
self.assertEqual(alignment2.query.id, "name4")
alignment2 = next(alignments2)
self.assertEqual(alignment2.query.id, "name5")
alignment2 = next(alignments2)
self.assertEqual(alignment2.query.id, "name6")
alignment3 = next(alignments3)
self.assertEqual(alignment3.query.id, "name6")
alignment3 = next(alignments3)
self.assertEqual(alignment3.query.id, "name7")
alignment1 = next(alignments1)
self.assertEqual(alignment1.query.id, "name3")
alignment1 = next(alignments1)
self.assertEqual(alignment1.query.id, "name4")
alignment1 = next(alignments1)
self.assertEqual(alignment1.query.id, "name5")
alignment2 = next(alignments2)
self.assertEqual(alignment2.query.id, "name7")
self.assertRaises(StopIteration, next, alignments2)
alignment1 = next(alignments1)
self.assertEqual(alignment1.query.id, "name6")
alignment1 = next(alignments1)
self.assertEqual(alignment1.query.id, "name7")
self.assertRaises(StopIteration, next, alignments3)
alignment1 = next(alignments1)
self.assertEqual(alignment1.query.id, "name8")
self.assertRaises(StopIteration, next, alignments1)
class BinaryTestBaseClass(unittest.TestCase):
def assertBinaryEqual(self, file1, file2):
blocksize = 1024
n = 0
while True:
data1 = file1.read(blocksize)
data2 = file2.read(blocksize)
if data1 == b"" and data2 == b"":
return
n1 = len(data1)
if data1 == data2:
n += n1
continue
n2 = len(data2)
if n1 < n2:
return self.fail(f"unequal file sizes: {n1} bytes vs >= {n2} bytes")
if n1 > n2:
return self.fail(f"unequal file sizes: >= {n1} bytes vs {n2} bytes")
for i, (c1, c2) in enumerate(zip(data1, data2)):
if c1 != c2:
return self.fail(f"bytes at position {n + i} differ: {c1} vs {c2}")
@unittest.skipUnless(big is True, "big file; use --big to run")
class TestAlign_big(BinaryTestBaseClass):
# BED files were downloaded from the UCSC table browser:
#
# ucsc.bed contains the GENCODE V43 Basic gene annotations for human genome
# assembly hg38.
#
# anoGam3.bed contains the AUGUSTUS gene annotations for genome assembly
# anoGam3 of Anopheles gambiae (African malaria mosquito).
#
# ailMel1.bed contains the NCBI RefSeq All gene annotations for genome
# assembly ailMel1 of Ailuropoda melanoleuca (giant panda).
#
# bisBis1.bed contains the AUGUSTUS gene annotations for genome assembly
# bisBis1 of Bison bison bison (American bison).
#
# bigBed files were generated using bedToBigBed v. 2.9 found in
# jksrc.v445.zip of the 'kent' source tree provided by UCSC
# (http://hgdownload.cse.ucsc.edu/admin/).
def test_a_compressed(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# bedToBigBed -as=Blat/bed12.as ucsc.clean.bed Align/hg38.chrom.sizes ucsc.bb
with open("Blat/bed12.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_b_uncompressed(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# bedToBigBed -as=Blat/bed12.as -unc ucsc.clean.bed Align/hg38.chrom.sizes ucsc.unc.bb
with open("Blat/bed12.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
bigBedFileName = "ucsc.unc.bb"
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
declaration=declaration,
compress=False,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_c_bed3(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# cut -f 1-3 ucsc.clean.bed > ucsc.bed3.bed
# bedToBigBed -as=Blat/bed3.as -type=bed3 ucsc.bed3.bed Align/hg38.chrom.sizes ucsc.bed3.bb
with open("Blat/bed3.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
bigBedFileName = "ucsc.bed3.bb"
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
bedN=3,
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_d_bed4(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# cut -f 1-4 ucsc.clean.bed > ucsc.bed4.bed
# bedToBigBed -as=Blat/bed4.as -type=bed4 ucsc.bed4.bed Align/hg38.chrom.sizes ucsc.bed4.bb
with open("Blat/bed4.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
bigBedFileName = "ucsc.bed4.bb"
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
bedN=4,
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_e_bed5(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# cut -f 1-5 ucsc.clean.bed > ucsc.bed5.bed
# bedToBigBed -as=Blat/bed5.as -type=bed5 ucsc.bed5.bed Align/hg38.chrom.sizes ucsc.bed5.bb
with open("Blat/bed5.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
bigBedFileName = "ucsc.bed5.bb"
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
bedN=5,
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_f_bed6(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# cut -f 1-6 ucsc.clean.bed > ucsc.bed6.bed
# bedToBigBed -as=Blat/bed6.as -type=bed6 ucsc.bed6.bed Align/hg38.chrom.sizes ucsc.bed6.bb
with open("Blat/bed6.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
bigBedFileName = "ucsc.bed6.bb"
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
bedN=6,
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_g_bed7(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# cut -f 1-7 ucsc.clean.bed > ucsc.bed7.bed
# bedToBigBed -as=Blat/bed7.as -type=bed7 ucsc.bed7.bed Align/hg38.chrom.sizes ucsc.bed7.bb
with open("Blat/bed7.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
bigBedFileName = "ucsc.bed7.bb"
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
bedN=7,
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_h_bed8(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# cut -f 1-8 ucsc.clean.bed > ucsc.bed8.bed
# bedToBigBed -as=Blat/bed8.as -type=bed8 ucsc.bed8.bed Align/hg38.chrom.sizes ucsc.bed8.bb
with open("Blat/bed8.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
bigBedFileName = "ucsc.bed8.bb"
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
bedN=8,
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_i_bed9(self):
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -k1,1 -k2,2n ucsc.clean.bed -o ucsc.clean.bed
# cut -f 1-9 ucsc.clean.bed > ucsc.bed9.bed
# bedToBigBed -as=Blat/bed9.as -type=bed9 ucsc.bed9.bed Align/hg38.chrom.sizes ucsc.bed9.bb
with open("Blat/bed9.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
bigBedFileName = "ucsc.bed9.bb"
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
bedN=9,
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_j_extraindex(self):
# If the names in the BED file are not unique, the binary contents of
# the generated bigBed file will depend on the exact implementation of
# the quicksort algorithm in the Standard Library of C (used by
# bedToBigBed) and the quicksort algorithm in numpy (used by Biopython).
# To prevent spurious errors when comparing bigBed files created by
# bedToBigBed and by Biopython, we first remove lines with duplicated
# names from the BED file.
# grep -E -v 'fix|alt' Blat/ucsc.bed > ucsc.clean.bed
# sort -u -k4,4 ucsc.clean.bed | sort -k1,1 -k2,2n > ucsc.unique.bed
# bedToBigBed -as=Blat/bed12.as -extraIndex=name ucsc.unique.bed Align/hg38.chrom.sizes ucsc.indexed.bb
with open("Blat/bed12.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ucsc.indexed.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
declaration=declaration,
extraIndex=["name"],
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_k_anogam(self):
# sort -k1,1 -k2,2n Blat/anoGam3.bed -o anoGam3.bed
# bedToBigBed -as=Blat/bed12.as anoGam3.bed Blat/anoGam3.chrom.sizes anoGam3.bb
with open("Blat/bed12.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "anoGam3.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_l_ailmel(self):
# sort -k1,1 -k2,2n Blat/ailMel1.bed -o ailMel1.bed
# bedToBigBed -as=Blat/bed12.as ailMel1.bed Blat/ailMel1.chrom.sizes ailMel1.bb
with open("Blat/bed12.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "ailMel1.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
def test_m_bisbis(self):
# sort -k1,1 -k2,2n bisBis1.bed -o bisBis1.bed
# bedToBigBed -as=Blat/bed12.as bisBis1.bed Blat/bisBis1.chrom.sizes bisBis1.bb
with open("Blat/bed12.as") as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
bigBedFileName = "bisBis1.bb"
alignments = Align.parse(bigBedFileName, "bigbed")
with tempfile.TemporaryFile() as output, open(bigBedFileName, "rb") as stream:
Align.write(
alignments,
output,
"bigbed",
declaration=declaration,
compress=True,
)
output.flush()
output.seek(0)
self.assertBinaryEqual(output, stream)
class TestDeclarations(unittest.TestCase):
def test_declarations(self):
for length in (3, 4, 5, 6, 7, 8, 9, 12):
filename = "bed%d.as" % length
path = os.path.join("Blat", filename)
with open(path) as stream:
data = stream.read()
declaration = bigbed.AutoSQLTable.from_string(data)
self.assertEqual(declaration.name, "bed", msg=filename)
self.assertEqual(
declaration.comment, "Browser Extensible Data", msg=filename
)
self.assertEqual(len(declaration), length, msg=filename)
field = declaration[0]
self.assertEqual(field.as_type, "string", msg=filename)
self.assertEqual(field.name, "chrom", msg=filename)
self.assertEqual(
field.comment, "Reference sequence chromosome or scaffold", msg=filename
)
field = declaration[1]
self.assertEqual(field.as_type, "uint", msg=filename)
self.assertEqual(field.name, "chromStart", msg=filename)
self.assertEqual(
field.comment, "Start position in chromosome", msg=filename
)
field = declaration[2]
self.assertEqual(field.as_type, "uint", msg=filename)
self.assertEqual(field.name, "chromEnd", msg=filename)
self.assertEqual(field.comment, "End position in chromosome", msg=filename)
if length == 3:
return
field = declaration[3]
self.assertEqual(field.as_type, "string", msg=filename)
self.assertEqual(field.name, "name", msg=filename)
self.assertEqual(field.comment, "Name of item.", msg=filename)
if length == 4:
return
field = declaration[4]
self.assertEqual(field.as_type, "uint", msg=filename)
self.assertEqual(field.name, "score", msg=filename)
self.assertEqual(field.comment, "Score (0-1000)", msg=filename)
if length == 5:
return
field = declaration[5]
self.assertEqual(field.as_type, "char[1]", msg=filename)
self.assertEqual(field.name, "strand", msg=filename)
self.assertEqual(field.comment, "+ or - for strand", msg=filename)
if length == 6:
return
field = declaration[6]
self.assertEqual(field.as_type, "uint", msg=filename)
self.assertEqual(field.name, "thickStart", msg=filename)
self.assertEqual(
field.comment,
"Start of where display should be thick (start codon)",
msg=filename,
)
if length == 7:
return
field = declaration[7]
self.assertEqual(field.as_type, "uint", msg=filename)
self.assertEqual(field.name, "thickEnd", msg=filename)
self.assertEqual(
field.comment,
"End of where display should be thick (stop codon)",
msg=filename,
)
if length == 8:
return
field = declaration[8]
self.assertEqual(field.as_type, "uint", msg=filename)
self.assertEqual(field.name, "reserved", msg=filename)
self.assertEqual(
field.comment, "Used as itemRgb as of 2004-11-22", msg=filename
)
if length == 9:
return
field = declaration[9]
self.assertEqual(field.as_type, "int", msg=filename)
self.assertEqual(field.name, "blockCount", msg=filename)
self.assertEqual(field.comment, "Number of blocks", msg=filename)
field = declaration[10]
self.assertEqual(field.as_type, "int[blockCount]", msg=filename)
self.assertEqual(field.name, "blockSizes", msg=filename)
self.assertEqual(
field.comment, "Comma separated list of block sizes", msg=filename
)
field = declaration[11]
self.assertEqual(field.as_type, "int[blockCount]", msg=filename)
self.assertEqual(field.name, "chromStarts", msg=filename)
self.assertEqual(
field.comment, "Start positions relative to chromStart", msg=filename
)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)