/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.analysis; import jalview.analysis.*; import jalview.datamodel.*; import jalview.io.*; import jalview.jbgui.*; import java.applet.Applet; import java.awt.*; import java.io.*; import java.net.*; import java.util.*; public class AAFrequency { // Takes in a vector of sequences and column start and column end // and returns a vector of size (end-start+1). Each element of the // vector contains a hashtable with the keys being residues and // the values being the count of each residue in that column. // This class is used extensively in calculating alignment colourschemes // that depend on the amount of conservation in each alignment column. public static Vector calculate(Vector sequences, int start, int end) { Vector result = new Vector(); for (int i = start; i <= end; i++) { Hashtable residueHash = new Hashtable(); int maxCount = 0; String maxResidue = "-"; int nongap = 0; for (int j = 0; j < sequences.size(); j++) { if (sequences.elementAt(j) instanceof Sequence) { Sequence s = (Sequence) sequences.elementAt(j); if (s.getSequence().length() > i) { String res = s.getSequence().charAt(i) + ""; if (!jalview.util.Comparison.isGap(res.charAt(0))) { nongap++; } else { res = "-"; // we always use this for gaps in the property vectors } if (residueHash.containsKey(res)) { int count = ((Integer) residueHash.get(res)).intValue(); count++; if (!jalview.util.Comparison.isGap(res.charAt(0)) && (count >= maxCount)) { if (count > maxCount) { maxResidue = res; } else if (maxResidue.indexOf(res) == -1) { maxResidue += res; } maxCount = count; } residueHash.put(res, new Integer(count)); } else { residueHash.put(res, new Integer(1)); } } else { if (residueHash.containsKey("-")) { int count = ((Integer) residueHash.get("-")).intValue(); count++; residueHash.put("-", new Integer(count)); } else { residueHash.put("-", new Integer(1)); } } } } residueHash.put("maxCount", new Integer(maxCount)); if (maxCount < 0) { System.out.println("asasa " + maxCount); } residueHash.put("maxResidue", maxResidue); residueHash.put("size", new Integer(sequences.size())); residueHash.put("nongap", new Integer(nongap)); result.addElement(residueHash); } return result; } public static Vector calculatePID(SequenceI refseq, Vector sequences, int window, int start, int end) { Vector result = new Vector(); boolean init = true; Vector prev = null; for (int i = start; i <= end; i++) { Vector values = new Vector(); result.addElement(values); // If start < window/2 then set value to zero. if ((i < (window / 2)) || (i >= (refseq.getSequence().length() - (window / 2)))) { for (int j = 0; j < sequences.size(); j++) { values.addElement(new Integer(0)); } } else if (init == true) { init = false; int winstart = i - (window / 2); int winend = i + (window / 2); if ((window % 2) != 0) { winend++; } for (int j = 0; j < sequences.size(); j++) { values.addElement(new Integer(0)); } for (int k = winstart; k <= winend; k++) { String refchar = refseq.getSequence().substring(k, k + 1); if (jalview.util.Comparison.isGap(refchar.charAt(0))) { refchar = "-"; } else { for (int j = 0; j < sequences.size(); j++) { Sequence s = (Sequence) sequences.elementAt(j); if (s.getSequence().length() > k) { String res = s.getSequence().substring(k, k + 1); // no gapchar test needed if (res.equals(refchar)) { int val = ((Integer) values.elementAt(j)).intValue(); val++; values.setElementAt(new Integer(val), j); } } } } } prev = values; } else { int winstart = i - (window / 2); int winend = i + (window / 2); if ((window % 2) != 0) { winend++; } // We need to take the previous set of values // subtract the pid at winstart-1 // and add the pid at winend; String pre_refchar = refseq.getSequence().substring(winstart - 1, winstart); String pos_refchar = "-"; if (refseq.getSequence().length() > winend) { pos_refchar = refseq.getSequence().substring(winend, winend + 1); } for (int j = 0; j < sequences.size(); j++) { // First copy the pid value from i-1 int val = ((Integer) prev.elementAt(j)).intValue(); Sequence s = (Sequence) sequences.elementAt(j); String pre_char = s.getSequence().substring(winstart - 1, winstart); String pos_char = "-"; if (s.getSequence().length() > winend) { pos_char = s.getSequence().substring(winend, winend + 1); } // Now substract 1 if the chars at winstart-1 match if ((jalview.util.Comparison.isGap(pre_refchar.charAt(0)) == false) && pre_char.equals(pre_refchar)) { val--; } if ((jalview.util.Comparison.isGap(pos_refchar.charAt(0)) == false) && pos_char.equals(pos_refchar)) { val++; } values.addElement(new Integer(val)); } prev = values; } } return result; } public static Hashtable findBlocks(Vector seqs, int start, int end, Vector exc) { // start and end are in real (not relative coords); // The coords in the hashtable that is returned are in relative coords // i.e. start from 0 Hashtable blocks = new Hashtable(); boolean prev = false; int bstart = -1; for (int i = start; i <= end; i++) { SequenceI seq = (SequenceI) seqs.elementAt(0); char c = seq.getCharAt(i); boolean found = true; int j = 1; while ((j < seqs.size()) && (found == true)) { SequenceI jseq = (SequenceI) seqs.elementAt(j); if (!exc.contains(jseq)) { char cc = jseq.getCharAt(i); if (cc != c) { found = false; } } j++; } if ((prev == false) && (found == true)) { bstart = i; } else if ((prev == true) && (found == false) && (bstart != -1)) { int blockstart = bstart - start; int blocklen = i - bstart; //System.out.println("Start len " + blockstart + " " + blocklen); for (int jj = blockstart; jj < (blockstart + blocklen); jj++) { blocks.put(new Integer(jj), new Integer(blocklen)); } bstart = -1; } prev = found; } if (bstart != -1) { int blockstart = bstart - start; int blocklen = end - bstart; // System.out.println("Start len " + blockstart + " " + blocklen); for (int jj = blockstart; jj < (blockstart + blocklen); jj++) { blocks.put(new Integer(blockstart), new Integer(blocklen)); } } return blocks; } public static Hashtable findKmerCount(SequenceI seq, int start, int end, int window, int step, Vector kmers) { int tmpstart = start; Hashtable vals = new Hashtable(); while (tmpstart <= end) { String tmpstr = seq.getSequence().substring(tmpstart - (window / 2), tmpstart + (window / 2)); int count = 0; //System.out.println("Str " + tmpstr); for (int ii = 0; ii < kmers.size(); ii++) { String kmer = ((SequenceI) kmers.elementAt(ii)).getSequence(); int i = -1; while (tmpstr.indexOf(kmer, i) != -1) { i = tmpstr.indexOf(kmer, i); i++; count++; } ii++; } vals.put(new Integer(tmpstart), new Integer(count)); tmpstart += step; } return vals; } public static Hashtable findBlockStarts(Vector seqs, int start, int end, Vector exc) { // start and end are in real (not relative coords); // The coords in the hashtable that is returned are in relative coords // i.e. start from 0 Hashtable blocks = new Hashtable(); boolean prev = false; int bstart = -1; for (int i = start; i <= end; i++) { SequenceI seq = (SequenceI) seqs.elementAt(0); char c = seq.getCharAt(i); boolean found = true; int j = 1; while ((j < seqs.size()) && (found == true)) { SequenceI jseq = (SequenceI) seqs.elementAt(j); if (!exc.contains(jseq)) { char cc = jseq.getCharAt(i); if (cc != c) { found = false; } } j++; } if ((prev == false) && (found == true)) { bstart = i; } else if ((prev == true) && (found == false) && (bstart != -1)) { int blockstart = bstart - start; int blocklen = i - bstart; // System.out.println("Start len " + blockstart + " " + blocklen); //for (int jj = blockstart; jj < blockstart + blocklen;jj++) { blocks.put(new Integer(blockstart), new Integer(blocklen)); // } bstart = -1; } prev = found; } if (bstart != -1) { int blockstart = bstart - start; int blocklen = end - bstart; // System.out.println("Start len " + blockstart + " " + blocklen); //for (int jj = blockstart; jj < blockstart + blocklen;jj++) { blocks.put(new Integer(blockstart), new Integer(blocklen)); // } } return blocks; } }