2 * This file is part of the Vamsas Client version 0.1.
\r
3 * Copyright 2009 by Jim Procter, Iain Milne, Pierre Marguerite,
\r
4 * Andrew Waterhouse and Dominik Lindner.
\r
6 * Earlier versions have also been incorporated into Jalview version 2.4
\r
7 * since 2008, and TOPALi version 2 since 2007.
\r
9 * The Vamsas Client is free software: you can redistribute it and/or modify
\r
10 * it under the terms of the GNU Lesser General Public License as published by
\r
11 * the Free Software Foundation, either version 3 of the License, or
\r
12 * (at your option) any later version.
\r
14 * The Vamsas Client is distributed in the hope that it will be useful,
\r
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
17 * GNU Lesser General Public License for more details.
\r
19 * You should have received a copy of the GNU Lesser General Public License
\r
20 * along with the Vamsas Client. If not, see <http://www.gnu.org/licenses/>.
\r
22 package uk.ac.vamsas.objects.utils;
\r
24 import java.io.BufferedWriter;
\r
25 import java.io.IOException;
\r
26 import java.io.OutputStream;
\r
27 import java.io.OutputStreamWriter;
\r
28 import java.util.regex.Pattern;
\r
30 import uk.ac.vamsas.objects.core.AlignmentSequence;
\r
31 import uk.ac.vamsas.objects.core.Sequence;
\r
32 import uk.ac.vamsas.objects.core.SequenceType;
\r
37 * TODO To change the template for this generated type comment go to
\r
38 * Window - Preferences - Java - Code Style - Code Templates
\r
42 public static void write_PirSeq(OutputStream os, SequenceType seq, int wid)
\r
43 throws IOException {
\r
44 BufferedWriter pir_out = new BufferedWriter(new OutputStreamWriter(os));
\r
45 pir_out.write(">P1;" + seq.getName() + "\n");
\r
46 int width = (wid < 1) ? 80 : wid;
\r
47 for (int j = 0, k = seq.getSequence().length(); j < k; j += width)
\r
49 pir_out.write(seq.getSequence().substring(j, j + width) + "\n");
\r
51 pir_out.write(seq.getSequence().substring(j) + "\n");
\r
55 public static void write_FastaSeq(OutputStream os, SequenceType seq)
\r
56 throws IOException {
\r
57 BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
\r
58 fasta_out.write(">" + seq.getName() + "\n");
\r
59 fasta_out.write(seq.getSequence() + "\n");
\r
63 public static void write_FastaSeq(OutputStream os, SequenceType seq, int wid)
\r
64 throws IOException {
\r
65 BufferedWriter fasta_out = new BufferedWriter(new OutputStreamWriter(os));
\r
66 fasta_out.write(">" + seq.getName() + "\n");
\r
67 int width = (wid < 1) ? 80 : wid;
\r
68 for (int j = 0, k = seq.getSequence().length(); j < k; j += width)
\r
70 fasta_out.write(seq.getSequence().substring(j, j + width) + "\n");
\r
72 fasta_out.write(seq.getSequence().substring(j) + "\n");
\r
77 *validate a SequenceType Vobject as an info:iubmb.org/aminoacid SequenceType
\r
78 * This version resolves references to Sequence objects from AlignmentSequence
\r
79 * TODO: Define info: urn for dictionary string (could also be regex of valid
\r
85 * @return true if a valid amino acid sequence Vobject
\r
87 private static boolean valid_aadictionary_string(String s, String dict) {
\r
90 // validate against dictionary
\r
91 // TODO generalise to resolve dictionary against info: urn for dictionary
\r
93 Pattern aa_repl = Pattern.compile("[ARNDCQEGHILKMFPSTWYVUX]+",
\r
94 Pattern.CASE_INSENSITIVE);
\r
95 String remnants = aa_repl.matcher(s).replaceAll("");
\r
96 return !remnants.matches("//S+");
\r
99 public static Sequence newSequence(String Name, String Sequence,
\r
100 String Dictionary, int start, int end) {
\r
101 // TODO: make hierarchy reflecting the SeqType Vobject.
\r
102 Sequence seq = new Sequence();
\r
103 seq.setDictionary(Dictionary);
\r
105 seq.setSequence(Sequence);
\r
106 seq.setStart(start);
\r
107 if (start <= end) {
\r
108 if ((end - start) != Sequence.length())
\r
109 seq.setEnd(start + Sequence.length());
\r
111 // reverse topology mapping. TODO: VAMSAS: decide if allowed to do
\r
112 // start>end on Sequence Vobject
\r
113 if ((start - end) != Sequence.length())
\r
114 seq.setEnd(end + Sequence.length());
\r
119 public static AlignmentSequence newAlignmentSequence(String name,
\r
120 String alSequence, Sequence refseq, long start, long end) {
\r
121 if (refseq != null) {
\r
122 AlignmentSequence asq = new AlignmentSequence();
\r
124 asq.setSequence(alSequence);
\r
125 asq.setRefid(refseq);
\r
126 if (end > refseq.getEnd() || end < start || end == -1)
\r
127 end = refseq.getEnd();
\r
129 if (start < refseq.getStart())
\r
130 start = refseq.getStart();
\r
131 asq.setStart(start);
\r
137 public static boolean is_valid_aa_seq(SequenceType s) {
\r
139 boolean validref = false;
\r
140 if (s instanceof Sequence) {
\r
142 if (q.getDictionary() != null
\r
144 && q.getDictionary().length() > 0
\r
145 || !q.getDictionary().equals(SymbolDictionary.STANDARD_AA))
\r
147 return valid_aadictionary_string(q.getSequence(),
\r
148 SymbolDictionary.STANDARD_AA);
\r
151 // follow references
\r
152 if (s instanceof AlignmentSequence) {
\r
153 Object w = (((AlignmentSequence) s).getRefid());
\r
154 if (w != null && w != s && w instanceof SequenceType)
\r
155 return is_valid_aa_seq((SequenceType) w)
\r
156 && valid_aadictionary_string(((AlignmentSequence) s).getSequence(),
\r
157 SymbolDictionary.STANDARD_AA);
\r