mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
241 lines
15 KiB
TeX
241 lines
15 KiB
TeX
% This is the main LaTeX file which is used to produce the Biopython
|
|
% Tutorial documentation.
|
|
%
|
|
% If you just want to read the documentation, you can pick up ready-to-go
|
|
% copies in both pdf and html format from:
|
|
%
|
|
% http://biopython.org/DIST/docs/tutorial/Tutorial.html
|
|
% http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
|
|
%
|
|
% If you want to typeset the documentation, you'll need a standard TeX/LaTeX
|
|
% distribution (I use teTeX, which works great for me on Unix platforms).
|
|
% Additionally, you need HeVeA (or at least hevea.sty), which can be
|
|
% found at:
|
|
%
|
|
% http://pauillac.inria.fr/~maranget/hevea/index.html
|
|
%
|
|
% You will also need the pictures included in the document, some of
|
|
% which are UMLish diagrams created by Dia
|
|
% (http://www.lysator.liu.se/~alla/dia/dia.html).
|
|
% These diagrams are available from Biopython git in the original dia
|
|
% format, which you can easily save as .png format using Dia itself.
|
|
% They are also checked in as the png files, so if you make
|
|
% modifications to the original dia files, the png files should also be
|
|
% changed.
|
|
%
|
|
% Once you're all set, you should be able to generate pdf by running:
|
|
%
|
|
% pdflatex Tutorial.tex (to generate the first draft)
|
|
% pdflatex Tutorial.tex (to get the cross references right)
|
|
% pdflatex Tutorial.tex (to get the table of contents right)
|
|
%
|
|
% To generate the html, you'll need HeVeA installed. You should be
|
|
% able to just run:
|
|
%
|
|
% hevea -fix Tutorial.tex
|
|
%
|
|
% However, on older versions of hevea you may first need to remove the
|
|
% Tutorial.aux file generated by LaTeX, then run hevea twice to get
|
|
% the references right.
|
|
%
|
|
% If you want to typeset this and have problems, please get in touch via
|
|
% the mailing list or GitHub, and we'll try to get things resolved. We
|
|
% always love to have people interested in the documentation!
|
|
|
|
\documentclass{report}
|
|
\usepackage{url}
|
|
\usepackage{fullpage}
|
|
\usepackage{hevea}
|
|
\usepackage{graphicx}
|
|
|
|
% For syntax coloring of python, pycon, bash etc in pdflatex:
|
|
\usepackage{minted}
|
|
% Minted fails on hevea, https://github.com/gpoore/minted/issues/234
|
|
% silently fall back on verbatim - ignore the language argument:
|
|
%HEVEA \newenvironment{minted}[1]{\verbatim}{\endverbatim}
|
|
|
|
% make everything have section numbers
|
|
\setcounter{secnumdepth}{4}
|
|
|
|
% Make links between references
|
|
\usepackage{hyperref}
|
|
\newif\ifpdf
|
|
\ifx\pdfoutput\undefined
|
|
\pdffalse
|
|
\else
|
|
\pdfoutput=1
|
|
\pdftrue
|
|
\fi
|
|
\ifpdf
|
|
\hypersetup{colorlinks=true, hyperindex=true, citecolor=red, urlcolor=blue}
|
|
\fi
|
|
|
|
\begin{document}
|
|
|
|
\begin{htmlonly}
|
|
\title{Biopython Tutorial and Cookbook}
|
|
\end{htmlonly}
|
|
\begin{latexonly}
|
|
\title{
|
|
%Hack to get the logo on the PDF front page:
|
|
\includegraphics[width=\textwidth]{images/biopython_logo.pdf}\\
|
|
%Hack to get some white space using a blank line:
|
|
~\\
|
|
Biopython Tutorial and Cookbook}
|
|
\end{latexonly}
|
|
|
|
\author{Jeff Chang, Brad Chapman, Iddo Friedberg, Thomas Hamelryck, \\
|
|
Michiel de Hoon, Peter Cock, Tiago Antao, Eric Talevich, Bartek Wilczy\'{n}ski}
|
|
\date{Last Update -- \today\ (Biopython \bpversion)}
|
|
|
|
%Hack to get the logo at the start of the HTML front page:
|
|
%(hopefully this isn't going to be too wide for most people)
|
|
\begin{rawhtml}
|
|
<P ALIGN="center">
|
|
<IMG ALIGN="center" SRC="images/biopython_logo.svg" TITLE="Biopython Logo" ALT="[Biopython Logo]" width="450" height="300" />
|
|
</p>
|
|
\end{rawhtml}
|
|
|
|
\maketitle
|
|
\tableofcontents
|
|
|
|
\include{Tutorial/chapter_introduction}
|
|
\include{Tutorial/chapter_quick_start}
|
|
\include{Tutorial/chapter_seq_objects}
|
|
\include{Tutorial/chapter_seq_annot}
|
|
\include{Tutorial/chapter_seqio}
|
|
\include{Tutorial/chapter_align}
|
|
\include{Tutorial/chapter_pairwise}
|
|
\include{Tutorial/chapter_msa}
|
|
\include{Tutorial/chapter_pairwise2}
|
|
\include{Tutorial/chapter_blast}
|
|
\include{Tutorial/chapter_searchio}
|
|
\include{Tutorial/chapter_entrez}
|
|
\include{Tutorial/chapter_uniprot}
|
|
\include{Tutorial/chapter_pdb}
|
|
\include{Tutorial/chapter_popgen}
|
|
\include{Tutorial/chapter_phylo}
|
|
\include{Tutorial/chapter_motifs}
|
|
\include{Tutorial/chapter_cluster}
|
|
\include{Tutorial/chapter_graphics}
|
|
\include{Tutorial/chapter_kegg}
|
|
\include{Tutorial/chapter_phenotype}
|
|
\include{Tutorial/chapter_cookbook}
|
|
\include{Tutorial/chapter_testing}
|
|
\include{Tutorial/chapter_contributing}
|
|
\include{Tutorial/chapter_appendix}
|
|
|
|
\begin{thebibliography}{99}
|
|
\bibitem{cock2009}
|
|
Peter J. A. Cock, Tiago Antao, Jeffrey T. Chang, Brad A. Chapman, Cymon J. Cox, Andrew Dalke, Iddo Friedberg, Thomas Hamelryck, Frank Kauff, Bartek Wilczynski, Michiel J. L. de Hoon: ``Biopython: freely available Python tools for computational molecular biology and bioinformatics''. {\it Bioinformatics} {\bf 25} (11), 1422--1423 (2009).
|
|
\url{https://doi.org/10.1093/bioinformatics/btp163}
|
|
\bibitem{pritchard2006}
|
|
Leighton Pritchard, Jennifer A. White, Paul R.J. Birch, Ian K. Toth: ``GenomeDiagram: a python package for the visualization of large-scale genomic data''. {\it Bioinformatics} {\bf 22} (5): 616--617 (2006).
|
|
\url{https://doi.org/10.1093/bioinformatics/btk021}
|
|
\bibitem{toth2006}
|
|
Ian K. Toth, Leighton Pritchard, Paul R. J. Birch: ``Comparative genomics reveals what makes an enterobacterial plant pathogen''. {\it Annual Review of Phytopathology} {\bf 44}: 305--336 (2006).
|
|
\url{https://doi.org/10.1146/annurev.phyto.44.070505.143444}
|
|
\bibitem{vanderauwera2009}
|
|
G\'eraldine A. van der Auwera, Jaroslaw E. Kr\'ol, Haruo Suzuki, Brian Foster, Rob van Houdt, Celeste J. Brown, Max Mergeay, Eva M. Top: ``Plasmids captured in C. metallidurans CH34: defining the PromA family of broad-host-range plasmids''.
|
|
\textit{Antonie van Leeuwenhoek} {\bf 96} (2): 193--204 (2009).
|
|
\url{https://doi.org/10.1007/s10482-009-9316-9}
|
|
\bibitem{proux2002}
|
|
Caroline Proux, Douwe van Sinderen, Juan Suarez, Pilar Garcia, Victor Ladero, Gerald F. Fitzgerald, Frank Desiere, Harald Br\"ussow:
|
|
``The dilemma of phage taxonomy illustrated by comparative genomics of Sfi21-Like Siphoviridae in lactic acid bacteria''. \textit{Journal of Bacteriology} {\bf 184} (21): 6026--6036 (2002).
|
|
\url{https://doi.org/10.1128/JB.184.21.6026-6036.2002}
|
|
\bibitem{jupe2012}
|
|
Florian Jupe, Leighton Pritchard, Graham J. Etherington, Katrin MacKenzie, Peter JA Cock, Frank Wright, Sanjeev Kumar Sharma1, Dan Bolser, Glenn J Bryan, Jonathan DG Jones, Ingo Hein: ``Identification and localisation of the NB-LRR gene family within the potato genome''. \textit{BMC Genomics} {\bf 13}: 75 (2012).
|
|
\url{https://doi.org/10.1186/1471-2164-13-75}
|
|
\bibitem{cock2010}
|
|
Peter J. A. Cock, Christopher J. Fields, Naohisa Goto, Michael L. Heuer, Peter M. Rice: ``The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants''. \textit{Nucleic Acids Research} {\bf 38} (6): 1767--1771 (2010). \url{https://doi.org/10.1093/nar/gkp1137}
|
|
\bibitem{talevich2012}
|
|
Eric Talevich, Brandon M. Invergo, Peter J.A. Cock, Brad A. Chapman: ``Bio.Phylo: A unified toolkit for processing, analyzing and visualizing phylogenetic trees in Biopython''. \textit{BMC Bioinformatics} {\bf 13}: 209 (2012).
|
|
\url{https://doi.org/10.1186/1471-2105-13-209}
|
|
\bibitem{cornish1985}
|
|
Athel Cornish-Bowden: ``Nomenclature for incompletely specified bases in nucleic acid sequences: Recommendations 1984.'' \textit{Nucleic Acids Research} {\bf 13} (9): 3021--3030 (1985).
|
|
\url{https://doi.org/10.1093/nar/13.9.3021}
|
|
\bibitem{cavener1987}
|
|
Douglas R. Cavener: ``Comparison of the consensus sequence flanking translational start sites in Drosophila and vertebrates.'' \textit{Nucleic Acids Research} {\bf 15} (4): 1353--1361 (1987).
|
|
\url{https://doi.org/10.1093/nar/15.4.1353}
|
|
\bibitem{bailey1994}
|
|
Timothy L. Bailey and Charles Elkan: ``Fitting a mixture model by expectation maximization to discover motifs in biopolymers'', \textit{Proceedings of the Second International Conference on Intelligent Systems for Molecular Biology} 28--36. AAAI Press, Menlo Park, California (1994).
|
|
\bibitem{chapman2000}
|
|
Brad Chapman and Jeff Chang: ``Biopython: Python tools for computational biology''. \textit{ACM SIGBIO Newsletter} {\bf 20} (2): 15--19 (August 2000).
|
|
\bibitem{darling2004}
|
|
Aaron E. Darling, Bob Mau, Frederick R. Blattner, Nicole T. Perna: ``Mauve: Multiple alignment of conserved genomic sequence with rearrangements.'' \textit{Genome Research} {\bf 14} (7): 1394--1403 (2004).
|
|
\url{https://doi.org/10.1101/gr.2289704}
|
|
\bibitem{dayhoff1978}
|
|
M.O. Dayhoff, R.M. Schwartz, and B.C. Orcutt: ``A Model of Evolutionary Change in Proteins.'' \textit{Atlas of Protein Sequence and Structure}, Volume 5, Supplement 3, 1978: 345--352. The National Biomedical Research Foundation, 1979.
|
|
\bibitem{dehoon2004}
|
|
Michiel J. L. de Hoon, Seiya Imoto, John Nolan, Satoru Miyano: ``Open source clustering software''. \textit{Bioinformatics} {\bf 20} (9): 1453--1454 (2004).
|
|
\url{https://doi.org/10.1093/bioinformatics/bth078}
|
|
\bibitem{durbin1998}
|
|
Richard Durbin, Sean R. Eddy, Anders Krogh, Graeme Mitchison:
|
|
``Biological sequence analysis: Probabilistic models of proteins and nucleic acids''.
|
|
Cambridge University Press, Cambridge, UK (1998).
|
|
\bibitem{eisen1998}
|
|
Michiel B. Eisen, Paul T. Spellman, Patrick O. Brown, David Botstein: ``Cluster analysis and display of genome-wide expression patterns''. \textit{Proceedings of the National Academy of Science USA} {\bf 95} (25): 14863--14868 (1998). \url{https://doi.org/10.1073/pnas.96.19.10943-c}
|
|
\bibitem{golub1971}
|
|
Gene H. Golub, Christian Reinsch: ``Singular value decomposition and least squares solutions''. In \textit{Handbook for Automatic Computation}, {\bf 2}, (Linear Algebra) (J. H. Wilkinson and C. Reinsch, eds), 134--151. New York: Springer-Verlag (1971).
|
|
\bibitem{golub1989}
|
|
Gene H. Golub, Charles F. Van Loan: \textit{Matrix computations}, 2nd edition (1989).
|
|
\bibitem{hamelryck2003a}
|
|
Thomas Hamelryck and Bernard Manderick: ``PDB parser and structure class
|
|
implemented in Python''. \textit{Bioinformatics}, \textbf{19} (17): 2308--2310 (2003) \url{https://doi.org/10.1093/bioinformatics/btg299}.
|
|
\bibitem{hamelryck2003b}
|
|
Thomas Hamelryck: ``Efficient identification of side-chain patterns using a multidimensional index tree''. \textit{Proteins} {\bf 51} (1): 96--108 (2003).
|
|
\url{https://doi.org/10.1002/prot.10338}
|
|
\bibitem{hamelryck2005}
|
|
Thomas Hamelryck: ``An amino acid has two sides; A new 2D measure provides a different view of solvent exposure''. \textit{Proteins} {\bf 59} (1): 29--48 (2005).
|
|
\url{https://doi.org/10.1002/prot.20379}.
|
|
\bibitem{henikoff1992}
|
|
Steven Henikoff, Jorja G. Henikoff: ``Amino acid substitution matrices from protein blocks.'' \textit{Proceedings of the National Academy of Sciences USA} {\bf 89} (2): 10915--10919 (1992). \url{https://doi.org/10.1073/pnas.89.22.10915}.
|
|
\bibitem{hihara2001}
|
|
Yukako Hihara, Ayako Kamei, Minoru Kanehisa, Aaron Kaplan and Masahiko Ikeuchi: ``DNA microarray analysis of cyanobacterial gene expression during acclimation to high light''. \textit{Plant Cell} {\bf 13} (4): 793--806 (2001). \url{https://doi.org/10.1105/tpc.13.4.793}.
|
|
\bibitem{altschul1990}
|
|
Stephen F. Altschul, Warren Gish, Webb Miller, Eugene W. Myers, David J. Lipman: ``Basic Local Alignment Search Tool''. \textit{Journal of Molecular Biology} {\bf 215} (3): 403--410 (1990). \url{https://doi.org/10.1016/S0022-2836%2805%2980360-2}.
|
|
\bibitem{kachitvichyanukul1988}
|
|
Voratas Kachitvichyanukul, Bruce W. Schmeiser: Binomial Random Variate Generation. \textit{Communications of the ACM} {\bf 31} (2): 216--222 (1988). \url{https://doi.org/10.1145/42372.42381}
|
|
\bibitem{kent2002}
|
|
W. James Kent: ``BLAT --- The BLAST-Like Alignment Tool''. \textit{Genome Research} {\bf 12}: 656--664 (2002). \url{https://doi.org/10.1101/gr.229202}
|
|
\bibitem{kohonen1997}
|
|
Teuvo Kohonen: ``Self-organizing maps'', 2nd Edition. Berlin; New York: Springer-Verlag (1997).
|
|
\bibitem{lecuyer1988}
|
|
Pierre L'Ecuyer: ``Efficient and Portable Combined Random Number Generators.''
|
|
\textit{Communications of the ACM} {\bf 31} (6): 742--749,774 (1988).
|
|
\url{https://doi.org/10.1145/62959.62969}
|
|
\bibitem{li2009}
|
|
Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin: ``The Sequence Alignment/Map format and SAMtools.'' \textit{Bioinformatics} {\bf 25} (16): 2078--2079 (2009).
|
|
\url{https://doi.org/10.1093/bioinformatics/btp352}
|
|
\bibitem{majumdar2005}
|
|
Indraneel Majumdar, S. Sri Krishna, Nick V. Grishin: ``PALSSE: A program to delineate linear secondary structural elements from protein structures.'' \textit{BMC Bioinformatics}, {\bf 6}: 202 (2005).
|
|
\url{https://doi.org/10.1186/1471-2105-6-202}.
|
|
\bibitem{matys2003}
|
|
V. Matys, E. Fricke, R. Geffers, E. G\"ossling, M. Haubrock, R. Hehl, K. Hornischer, D. Karas, A.E. Kel, O.V. Kel-Margoulis, D.U. Kloos, S. Land, B. Lewicki-Potapov, H. Michael, R. M\"unch, I. Reuter, S. Rotert, H. Saxel, M. Scheer, S. Thiele, E. Wingender E: ``TRANSFAC: transcriptional regulation, from patterns to profiles.'' Nucleic Acids Research {\bf 31} (1): 374--378 (2003).
|
|
\url{https://doi.org/10.1093/nar/gkg108}
|
|
\bibitem{saldanha2004}
|
|
Alok Saldanha: ``Java Treeview---extensible visualization of microarray data''. \textit{Bioinformatics} {\bf 20} (17): 3246--3248 (2004).
|
|
\url{https://doi.org/10.1093/bioinformatics/bth349}
|
|
\bibitem{schneider1986}
|
|
Thomas D. Schneider, Gary D. Stormo, Larry Gold: ``Information content of binding sites on nucleotide sequences''. \textit{Journal of Molecular Biology} {\bf 188} (3): 415--431 (1986). \url{https://doi.org/10.1016/0022-2836(86)90165-8}
|
|
\bibitem{schneider2005}
|
|
Adrian Schneider, Gina M. Cannarozzi, and Gaston H. Gonnet: ``Empirical codon substitution matrix''. \textit{BMC Bioinformatics} {\bf 6}: 134 (2005).
|
|
\url{https://doi.org/10.1186/1471-2105-6-134}
|
|
\bibitem{sibson1973}
|
|
Robin Sibson: ``SLINK: An optimally efficient algorithm for the single-link cluster method''. \textit{The Computer Journal} {\bf 16} (1): 30--34 (1973).
|
|
\url{https://doi.org/10.1093/comjnl/16.1.30}
|
|
\bibitem{snedecor1989}
|
|
George W. Snedecor, William G. Cochran: \textit{Statistical methods}. Ames, Iowa: Iowa State University Press (1989).
|
|
\bibitem{tamayo1999}
|
|
Pablo Tamayo, Donna Slonim, Jill Mesirov, Qing Zhu, Sutisak Kitareewan, Ethan Dmitrovsky, Eric S. Lander, Todd R. Golub: ``Interpreting patterns of gene expression with self-organizing maps: Methods and application to hematopoietic differentiation''. \textit{Proceedings of the National Academy of Science USA} {\bf 96} (6): 2907--2912 (1999). \url{https://doi.org/10.1073/pnas.96.6.2907}
|
|
\bibitem{tukey1977}
|
|
John W. Tukey: ``Exploratory data analysis''. Reading, Mass.: Addison-Wesley Pub. Co. (1977).
|
|
\bibitem{waterman1987}
|
|
Michael S. Waterman, Mark Eggert: ``A new algorithm for best subsequence alignments with application to tRNA-rRNA comparisons'', \textit{Journal of Molecular Biology} {\bf 197} (4): 723--728 (1987). \url{https://doi.org/10.1016/0022-2836(87)90478-5}
|
|
\bibitem{yeung2001}
|
|
Ka Yee Yeung, Walter L. Ruzzo: ``Principal Component Analysis for clustering gene expression data''. \textit{Bioinformatics} {\bf 17} (9): 763--774 (2001).
|
|
\url{https://doi.org/10.1093/bioinformatics/17.9.763}
|
|
\end{thebibliography}
|
|
\end{document}
|
|
|