mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
allow to read and write nonstandard GC, GS, GR lines in Stockholm format (#4504)
* read and write nonstandard GC, GS, GR lines * no warning for ignored non-standard GF lines * add contributor info * add test file for nonstandard annotations in stockholm format
This commit is contained in:
@ -234,7 +234,10 @@ class AlignmentIterator(interfaces.AlignmentIterator):
|
|||||||
else:
|
else:
|
||||||
assert len(value) == 1, (key, value)
|
assert len(value) == 1, (key, value)
|
||||||
value = value.pop()
|
value = value.pop()
|
||||||
alignment.annotations[AlignmentIterator.gf_mapping[key]] = value
|
try:
|
||||||
|
alignment.annotations[AlignmentIterator.gf_mapping[key]] = value
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _store_per_column_annotations(alignment, gc, columns, skipped_columns):
|
def _store_per_column_annotations(alignment, gc, columns, skipped_columns):
|
||||||
@ -251,7 +254,9 @@ class AlignmentIterator(interfaces.AlignmentIterator):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"{key} length is {len(value)}, expected {columns}"
|
f"{key} length is {len(value)}, expected {columns}"
|
||||||
)
|
)
|
||||||
alignment.column_annotations[AlignmentIterator.gc_mapping[key]] = value
|
alignment.column_annotations[
|
||||||
|
AlignmentIterator.gc_mapping.get(key, key)
|
||||||
|
] = value
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _store_per_sequence_annotations(alignment, gs):
|
def _store_per_sequence_annotations(alignment, gs):
|
||||||
@ -267,7 +272,9 @@ class AlignmentIterator(interfaces.AlignmentIterator):
|
|||||||
elif key == "DR":
|
elif key == "DR":
|
||||||
record.dbxrefs = value
|
record.dbxrefs = value
|
||||||
else:
|
else:
|
||||||
record.annotations[AlignmentIterator.gs_mapping[key]] = value
|
record.annotations[
|
||||||
|
AlignmentIterator.gs_mapping.get(key, key)
|
||||||
|
] = value
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _store_per_sequence_and_per_column_annotations(alignment, gr):
|
def _store_per_sequence_and_per_column_annotations(alignment, gr):
|
||||||
@ -277,9 +284,9 @@ class AlignmentIterator(interfaces.AlignmentIterator):
|
|||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Failed to find seqname {seqname}")
|
raise ValueError(f"Failed to find seqname {seqname}")
|
||||||
for keyword, letter_annotation in letter_annotations.items():
|
for key, letter_annotation in letter_annotations.items():
|
||||||
feature = AlignmentIterator.gr_mapping[keyword]
|
feature = AlignmentIterator.gr_mapping.get(key, key)
|
||||||
if keyword == "CSA":
|
if key == "CSA":
|
||||||
letter_annotation = letter_annotation.replace("-", "")
|
letter_annotation = letter_annotation.replace("-", "")
|
||||||
else:
|
else:
|
||||||
letter_annotation = letter_annotation.replace(".", "")
|
letter_annotation = letter_annotation.replace(".", "")
|
||||||
@ -533,7 +540,7 @@ class AlignmentWriter(interfaces.AlignmentWriter):
|
|||||||
for record in alignment.sequences:
|
for record in alignment.sequences:
|
||||||
name = record.id.ljust(width)
|
name = record.id.ljust(width)
|
||||||
for key, value in record.annotations.items():
|
for key, value in record.annotations.items():
|
||||||
feature = self.gs_mapping[key]
|
feature = self.gs_mapping.get(key, key)
|
||||||
lines.append(f"#=GS {name} {feature} {value}\n")
|
lines.append(f"#=GS {name} {feature} {value}\n")
|
||||||
if record.description:
|
if record.description:
|
||||||
lines.append(f"#=GS {name} DE {record.description}\n")
|
lines.append(f"#=GS {name} DE {record.description}\n")
|
||||||
@ -558,8 +565,8 @@ class AlignmentWriter(interfaces.AlignmentWriter):
|
|||||||
# alignment.column_annotations
|
# alignment.column_annotations
|
||||||
if alignment.column_annotations:
|
if alignment.column_annotations:
|
||||||
for key, value in alignment.column_annotations.items():
|
for key, value in alignment.column_annotations.items():
|
||||||
feature = self.gc_mapping[key]
|
feature = self.gc_mapping.get(key, key)
|
||||||
line = f"#=GC {feature}".ljust(start) + value + "\n"
|
line = f"#=GC {feature} ".ljust(start) + value + "\n"
|
||||||
lines.append(line)
|
lines.append(line)
|
||||||
lines.append("//\n")
|
lines.append("//\n")
|
||||||
return "".join(lines)
|
return "".join(lines)
|
||||||
@ -592,7 +599,7 @@ class AlignmentWriter(interfaces.AlignmentWriter):
|
|||||||
indices.reverse()
|
indices.reverse()
|
||||||
name = record.id.ljust(width)
|
name = record.id.ljust(width)
|
||||||
for key, value in record.letter_annotations.items():
|
for key, value in record.letter_annotations.items():
|
||||||
feature = AlignmentWriter.gr_mapping[key]
|
feature = AlignmentWriter.gr_mapping.get(key, key)
|
||||||
j = 0
|
j = 0
|
||||||
values = bytearray(b"." * len(aligned_sequence))
|
values = bytearray(b"." * len(aligned_sequence))
|
||||||
for i, letter in enumerate(aligned_sequence):
|
for i, letter in enumerate(aligned_sequence):
|
||||||
@ -600,7 +607,7 @@ class AlignmentWriter(interfaces.AlignmentWriter):
|
|||||||
values[i] = ord(value[j])
|
values[i] = ord(value[j])
|
||||||
j += 1
|
j += 1
|
||||||
value = values.decode()
|
value = values.decode()
|
||||||
line = f"#=GR {name} {feature}".ljust(start) + value + "\n"
|
line = f"#=GR {name} {feature} ".ljust(start) + value + "\n"
|
||||||
yield line
|
yield line
|
||||||
|
|
||||||
|
|
||||||
|
@ -320,6 +320,7 @@ please open an issue on GitHub or mention it on the mailing list.
|
|||||||
- Tiago Antao <https://github.com/tiagoantao>
|
- Tiago Antao <https://github.com/tiagoantao>
|
||||||
- Tianyi Shi <https://github.com/TianyiShi2001>
|
- Tianyi Shi <https://github.com/TianyiShi2001>
|
||||||
- Tim Burke <https://github.com/tipabu>
|
- Tim Burke <https://github.com/tipabu>
|
||||||
|
- Tom Eulenfeld <https://github.com/trichter>
|
||||||
- Tommy Carstensen <https://github.com/tommycarstensen>
|
- Tommy Carstensen <https://github.com/tommycarstensen>
|
||||||
- Tyghe Vallard <https://github.com/necrolyte2>
|
- Tyghe Vallard <https://github.com/necrolyte2>
|
||||||
- Uri Laserson <https://github.com/laserson>
|
- Uri Laserson <https://github.com/laserson>
|
||||||
|
1
NEWS.rst
1
NEWS.rst
@ -120,6 +120,7 @@ possible, especially the following contributors:
|
|||||||
- Ricardas Ralys (first contribution)
|
- Ricardas Ralys (first contribution)
|
||||||
- Rob Miller
|
- Rob Miller
|
||||||
- Thomas Holder
|
- Thomas Holder
|
||||||
|
- Tom Eulenfeld (first contribution)
|
||||||
- Vladislav Kuznetsov (first contribution)
|
- Vladislav Kuznetsov (first contribution)
|
||||||
- Wibowo Arindrarto
|
- Wibowo Arindrarto
|
||||||
- Yiming Qu (first contribution)
|
- Yiming Qu (first contribution)
|
||||||
|
39
Tests/Stockholm/example_nonstandardannotations.sth
Normal file
39
Tests/Stockholm/example_nonstandardannotations.sth
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# STOCKHOLM 1.0
|
||||||
|
#=GF ID HAT
|
||||||
|
#=GF AC PF02184.18
|
||||||
|
#=GF DE HAT (Half-A-TPR) repeat
|
||||||
|
#=GF AU SMART;
|
||||||
|
#=GF SE Alignment kindly provided by SMART
|
||||||
|
#=GF GA 21.00 21.00;
|
||||||
|
#=GF TC 21.00 21.00;
|
||||||
|
#=GF NC 20.90 20.90;
|
||||||
|
#=GF BM hmmbuild HMM.ann SEED.ann
|
||||||
|
#=GF SM hmmsearch -Z 57096847 -E 1000 --cpu 4 HMM pfamseq
|
||||||
|
#=GF TP Repeat
|
||||||
|
#=GF CL CL0020
|
||||||
|
#=GF RN [1]
|
||||||
|
#=GF RM 9478129
|
||||||
|
#=GF RT The HAT helix, a repetitive motif implicated in RNA processing.
|
||||||
|
#=GF RA Preker PJ, Keller W;
|
||||||
|
#=GF RL Trends Biochem Sci 1998;23:15-16.
|
||||||
|
#=GF DR INTERPRO; IPR003107;
|
||||||
|
#=GF DR SMART; HAT;
|
||||||
|
#=GF DR SO; 0001068; polypeptide_repeat;
|
||||||
|
#=GF CC The HAT (Half A TPR) repeat is found in several RNA processing
|
||||||
|
#=GF CC proteins [1].
|
||||||
|
#=GF SQ 3
|
||||||
|
#=GF nondefaultgf Nondefault GF lines are ignored in io
|
||||||
|
#=GS CRN_DROME/191-222 AC P17886.2
|
||||||
|
#=GS CRN_DROME/191-222 nonstandardgs 42
|
||||||
|
#=GS CLF1_SCHPO/185-216 AC P87312.1
|
||||||
|
#=GS CLF1_SCHPO/185-216 DR PDB; 3JB9 R; 185-216;
|
||||||
|
#=GS O16376_CAEEL/201-233 AC O16376.2
|
||||||
|
CRN_DROME/191-222 KEIDRAREIYERFVYVH.PDVKNWIKFARFEES
|
||||||
|
#=GR CRN_DROME/191-222 nonstandardgr --------X.XXXXXXXX---------------
|
||||||
|
CLF1_SCHPO/185-216 HENERARGIYERFVVVH.PEVTNWLRWARFEEE
|
||||||
|
#=GR CLF1_SCHPO/185-216 SS --HHHHHHHHHHHHHHS.--HHHHHHHHHHHHH
|
||||||
|
O16376_CAEEL/201-233 KEIDRARSVYQRFLHVHGINVQNWIKYAKFEER
|
||||||
|
#=GC SS_cons --HHHHHHHHHHHHHHS.--HHHHHHHHHHHHH
|
||||||
|
#=GC seq_cons KEIDRARuIYERFVaVH.P-VpNWIKaARFEEc
|
||||||
|
#=GC nonstandardgc --------..........---------------
|
||||||
|
//
|
@ -6,6 +6,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
|
|
||||||
from Bio import Align
|
from Bio import Align
|
||||||
|
|
||||||
|
|
||||||
@ -6688,6 +6689,25 @@ np.array([['V', 'E', 'R', 'Y', 'S', 'L', 'S', 'P', 'M', 'K', 'D', 'L', 'W',
|
|||||||
stream.close()
|
stream.close()
|
||||||
self.check_alignment_cath3(alignment)
|
self.check_alignment_cath3(alignment)
|
||||||
|
|
||||||
|
def test_io_nonstandard_annotations(self):
|
||||||
|
"""Test input and output of nonstandard GC, GS and GR annotation lines."""
|
||||||
|
# We write the alignment once to a stream and read it again to test
|
||||||
|
# both inpiut and output.
|
||||||
|
path = "Stockholm/example_nonstandardannotations.sth"
|
||||||
|
alignments = Align.parse(path, "stockholm")
|
||||||
|
alignment = next(alignments)
|
||||||
|
self.assertNotIn("nonstandardgf", alignment.annotations.keys())
|
||||||
|
stream = StringIO()
|
||||||
|
Align.write(alignment, stream, "stockholm")
|
||||||
|
stream.seek(0)
|
||||||
|
alignments = Align.parse(stream, "stockholm")
|
||||||
|
alignment = next(alignments)
|
||||||
|
stream.close()
|
||||||
|
self.assertIn("nonstandardgc", alignment.column_annotations.keys())
|
||||||
|
self.assertIn("nonstandardgs", alignment.sequences[0].annotations.keys())
|
||||||
|
self.assertIn("nonstandardgr", alignment.sequences[0].letter_annotations.keys())
|
||||||
|
self.assertNotIn("nonstandardgf", alignment.annotations.keys())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
runner = unittest.TextTestRunner(verbosity=2)
|
runner = unittest.TextTestRunner(verbosity=2)
|
||||||
|
Reference in New Issue
Block a user