diff --git a/Bio/Align/stockholm.py b/Bio/Align/stockholm.py index 45f0af351..f05453692 100644 --- a/Bio/Align/stockholm.py +++ b/Bio/Align/stockholm.py @@ -234,7 +234,10 @@ class AlignmentIterator(interfaces.AlignmentIterator): else: assert len(value) == 1, (key, value) value = value.pop() - alignment.annotations[AlignmentIterator.gf_mapping[key]] = value + try: + alignment.annotations[AlignmentIterator.gf_mapping[key]] = value + except KeyError: + pass @staticmethod def _store_per_column_annotations(alignment, gc, columns, skipped_columns): @@ -251,7 +254,9 @@ class AlignmentIterator(interfaces.AlignmentIterator): raise ValueError( f"{key} length is {len(value)}, expected {columns}" ) - alignment.column_annotations[AlignmentIterator.gc_mapping[key]] = value + alignment.column_annotations[ + AlignmentIterator.gc_mapping.get(key, key) + ] = value @staticmethod def _store_per_sequence_annotations(alignment, gs): @@ -267,7 +272,9 @@ class AlignmentIterator(interfaces.AlignmentIterator): elif key == "DR": record.dbxrefs = value else: - record.annotations[AlignmentIterator.gs_mapping[key]] = value + record.annotations[ + AlignmentIterator.gs_mapping.get(key, key) + ] = value @staticmethod def _store_per_sequence_and_per_column_annotations(alignment, gr): @@ -277,9 +284,9 @@ class AlignmentIterator(interfaces.AlignmentIterator): break else: raise ValueError(f"Failed to find seqname {seqname}") - for keyword, letter_annotation in letter_annotations.items(): - feature = AlignmentIterator.gr_mapping[keyword] - if keyword == "CSA": + for key, letter_annotation in letter_annotations.items(): + feature = AlignmentIterator.gr_mapping.get(key, key) + if key == "CSA": letter_annotation = letter_annotation.replace("-", "") else: letter_annotation = letter_annotation.replace(".", "") @@ -533,7 +540,7 @@ class AlignmentWriter(interfaces.AlignmentWriter): for record in alignment.sequences: name = record.id.ljust(width) for key, value in record.annotations.items(): - feature = self.gs_mapping[key] + feature = self.gs_mapping.get(key, key) lines.append(f"#=GS {name} {feature} {value}\n") if record.description: lines.append(f"#=GS {name} DE {record.description}\n") @@ -558,8 +565,8 @@ class AlignmentWriter(interfaces.AlignmentWriter): # alignment.column_annotations if alignment.column_annotations: for key, value in alignment.column_annotations.items(): - feature = self.gc_mapping[key] - line = f"#=GC {feature}".ljust(start) + value + "\n" + feature = self.gc_mapping.get(key, key) + line = f"#=GC {feature} ".ljust(start) + value + "\n" lines.append(line) lines.append("//\n") return "".join(lines) @@ -592,7 +599,7 @@ class AlignmentWriter(interfaces.AlignmentWriter): indices.reverse() name = record.id.ljust(width) for key, value in record.letter_annotations.items(): - feature = AlignmentWriter.gr_mapping[key] + feature = AlignmentWriter.gr_mapping.get(key, key) j = 0 values = bytearray(b"." * len(aligned_sequence)) for i, letter in enumerate(aligned_sequence): @@ -600,7 +607,7 @@ class AlignmentWriter(interfaces.AlignmentWriter): values[i] = ord(value[j]) j += 1 value = values.decode() - line = f"#=GR {name} {feature}".ljust(start) + value + "\n" + line = f"#=GR {name} {feature} ".ljust(start) + value + "\n" yield line diff --git a/CONTRIB.rst b/CONTRIB.rst index 81fd3392c..5a0addfd0 100644 --- a/CONTRIB.rst +++ b/CONTRIB.rst @@ -320,6 +320,7 @@ please open an issue on GitHub or mention it on the mailing list. - Tiago Antao - Tianyi Shi - Tim Burke +- Tom Eulenfeld - Tommy Carstensen - Tyghe Vallard - Uri Laserson diff --git a/NEWS.rst b/NEWS.rst index 212216889..367f3b0ab 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -120,6 +120,7 @@ possible, especially the following contributors: - Ricardas Ralys (first contribution) - Rob Miller - Thomas Holder +- Tom Eulenfeld (first contribution) - Vladislav Kuznetsov (first contribution) - Wibowo Arindrarto - Yiming Qu (first contribution) diff --git a/Tests/Stockholm/example_nonstandardannotations.sth b/Tests/Stockholm/example_nonstandardannotations.sth new file mode 100644 index 000000000..c45d676bd --- /dev/null +++ b/Tests/Stockholm/example_nonstandardannotations.sth @@ -0,0 +1,39 @@ +# STOCKHOLM 1.0 +#=GF ID HAT +#=GF AC PF02184.18 +#=GF DE HAT (Half-A-TPR) repeat +#=GF AU SMART; +#=GF SE Alignment kindly provided by SMART +#=GF GA 21.00 21.00; +#=GF TC 21.00 21.00; +#=GF NC 20.90 20.90; +#=GF BM hmmbuild HMM.ann SEED.ann +#=GF SM hmmsearch -Z 57096847 -E 1000 --cpu 4 HMM pfamseq +#=GF TP Repeat +#=GF CL CL0020 +#=GF RN [1] +#=GF RM 9478129 +#=GF RT The HAT helix, a repetitive motif implicated in RNA processing. +#=GF RA Preker PJ, Keller W; +#=GF RL Trends Biochem Sci 1998;23:15-16. +#=GF DR INTERPRO; IPR003107; +#=GF DR SMART; HAT; +#=GF DR SO; 0001068; polypeptide_repeat; +#=GF CC The HAT (Half A TPR) repeat is found in several RNA processing +#=GF CC proteins [1]. +#=GF SQ 3 +#=GF nondefaultgf Nondefault GF lines are ignored in io +#=GS CRN_DROME/191-222 AC P17886.2 +#=GS CRN_DROME/191-222 nonstandardgs 42 +#=GS CLF1_SCHPO/185-216 AC P87312.1 +#=GS CLF1_SCHPO/185-216 DR PDB; 3JB9 R; 185-216; +#=GS O16376_CAEEL/201-233 AC O16376.2 +CRN_DROME/191-222 KEIDRAREIYERFVYVH.PDVKNWIKFARFEES +#=GR CRN_DROME/191-222 nonstandardgr --------X.XXXXXXXX--------------- +CLF1_SCHPO/185-216 HENERARGIYERFVVVH.PEVTNWLRWARFEEE +#=GR CLF1_SCHPO/185-216 SS --HHHHHHHHHHHHHHS.--HHHHHHHHHHHHH +O16376_CAEEL/201-233 KEIDRARSVYQRFLHVHGINVQNWIKYAKFEER +#=GC SS_cons --HHHHHHHHHHHHHHS.--HHHHHHHHHHHHH +#=GC seq_cons KEIDRARuIYERFVaVH.P-VpNWIKaARFEEc +#=GC nonstandardgc --------..........--------------- +// diff --git a/Tests/test_Align_stockholm.py b/Tests/test_Align_stockholm.py index 5985b7811..dfd4a01a0 100644 --- a/Tests/test_Align_stockholm.py +++ b/Tests/test_Align_stockholm.py @@ -6,6 +6,7 @@ import unittest from io import StringIO + from Bio import Align @@ -6688,6 +6689,25 @@ np.array([['V', 'E', 'R', 'Y', 'S', 'L', 'S', 'P', 'M', 'K', 'D', 'L', 'W', stream.close() self.check_alignment_cath3(alignment) + def test_io_nonstandard_annotations(self): + """Test input and output of nonstandard GC, GS and GR annotation lines.""" + # We write the alignment once to a stream and read it again to test + # both inpiut and output. + path = "Stockholm/example_nonstandardannotations.sth" + alignments = Align.parse(path, "stockholm") + alignment = next(alignments) + self.assertNotIn("nonstandardgf", alignment.annotations.keys()) + stream = StringIO() + Align.write(alignment, stream, "stockholm") + stream.seek(0) + alignments = Align.parse(stream, "stockholm") + alignment = next(alignments) + stream.close() + self.assertIn("nonstandardgc", alignment.column_annotations.keys()) + self.assertIn("nonstandardgs", alignment.sequences[0].annotations.keys()) + self.assertIn("nonstandardgr", alignment.sequences[0].letter_annotations.keys()) + self.assertNotIn("nonstandardgf", alignment.annotations.keys()) + if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity=2)