package jalview.analysis; import jalview.jbgui.*; import jalview.datamodel.*; import jalview.io.*; import jalview.analysis.*; import java.awt.*; import java.applet.Applet; import java.util.*; import java.net.*; import java.io.*; public class AAFrequency { // Takes in a vector of sequences and column start and column end // and returns a vector of size (end-start+1). Each element of the // vector contains a hashtable with the keys being residues and // the values being the count of each residue in that column. // This class is used extensively in calculating alignment colourschemes // that depend on the amount of conservation in each alignment column. public static Vector calculate(Vector sequences,int start,int end) { Vector result = new Vector(); for (int i = start;i <= end; i++) { Hashtable residueHash = new Hashtable(); int maxCount = -1; String maxResidue = "-"; int nongap = 0; for (int j=0; j < sequences.size(); j++) { if (sequences.elementAt(j) instanceof Sequence) { Sequence s = (Sequence)sequences.elementAt(j); if (s.getSequence().length() > i) { String res = s.getSequence().substring(i,i+1); if (!res.equals("-")) nongap++; if (residueHash.containsKey(res)) { int count = ((Integer)residueHash.get(res)).intValue() ; count++; if (!res.equals("-") && count >= maxCount) { if(count>maxCount) maxResidue = res; else if(maxResidue.indexOf(res)==-1) maxResidue += res; maxCount = count; } residueHash.put(res,new Integer(count)); } else residueHash.put(res,new Integer(1)); } else { if (residueHash.containsKey("-")) { int count = ((Integer)residueHash.get("-")).intValue() ; count++; residueHash.put("-",new Integer(count)); } else residueHash.put("-",new Integer(1)); } } } residueHash.put("maxCount",new Integer(maxCount)); residueHash.put("maxResidue", maxResidue); residueHash.put("size", new Integer(sequences.size())); residueHash.put("nongap", new Integer(nongap)); result.addElement(residueHash); } return result; } public static Vector calculatePID(SequenceI refseq,Vector sequences,int window,int start,int end) { Vector result = new Vector(); boolean init = true; Vector prev = null; for (int i = start;i <= end; i++) { Vector values = new Vector(); result.addElement(values); // If start < window/2 then set value to zero. if (i< window/2 || i >= refseq.getSequence().length()-window/2) { for (int j = 0; j < sequences.size(); j++) { values.addElement(new Integer(0)); } } else if (init == true) { init = false; int winstart = i-window/2; int winend = i+window/2; if (window%2 != 0) { winend++; } for (int j = 0; j < sequences.size(); j++) { values.addElement(new Integer(0)); } for (int k = winstart; k <= winend; k++) { String refchar = refseq.getSequence().substring(k,k+1); for (int j = 0; j < sequences.size(); j++) { if (refchar.equals("-") == false) { Sequence s = (Sequence)sequences.elementAt(j); if (s.getSequence().length() > k) { String res = s.getSequence().substring(k,k+1); if (res.equals(refchar)) { int val = ((Integer)values.elementAt(j)).intValue(); val++; values.setElementAt(new Integer(val),j); } } } } } prev = values; } else { int winstart = i-window/2; int winend = i+window/2; if (window%2 != 0) { winend++; } // We need to take the previous set of values // subtract the pid at winstart-1 // and add the pid at winend; String pre_refchar = refseq.getSequence().substring(winstart-1,winstart); String pos_refchar = "-"; if (refseq.getSequence().length() > winend) { pos_refchar = refseq.getSequence().substring(winend,winend+1); } for (int j = 0; j < sequences.size(); j++) { // First copy the pid value from i-1 int val = ((Integer)prev.elementAt(j)).intValue(); Sequence s = (Sequence)sequences.elementAt(j); String pre_char = s.getSequence().substring(winstart-1,winstart); String pos_char = "-"; if (s.getSequence().length() > winend) { pos_char = s.getSequence().substring(winend,winend+1); } // Now substract 1 if the chars at winstart-1 match if (pre_refchar.equals("-") == false && pre_char.equals(pre_refchar)) { val--; } if (pos_refchar.equals("-") == false && pos_char.equals(pos_refchar)) { val++; } values.addElement(new Integer(val)); } prev = values; } } return result; } public static Hashtable findBlocks(Vector seqs, int start, int end,Vector exc) { // start and end are in real (not relative coords); // The coords in the hashtable that is returned are in relative coords // i.e. start from 0 Hashtable blocks = new Hashtable(); boolean prev = false; int bstart = -1; for (int i = start; i <= end ; i++) { SequenceI seq = (SequenceI)seqs.elementAt(0); char c = seq.getCharAt(i); boolean found = true; int j = 1; while (j < seqs.size() && found == true) { SequenceI jseq = (SequenceI)seqs.elementAt(j); if (!exc.contains(jseq)) { char cc = jseq.getCharAt(i); if ( cc != c) { found = false; } } j++; } if (prev == false && found == true) { bstart = i; } else if (prev == true && found == false && bstart != -1) { int blockstart = bstart-start; int blocklen = i-bstart; //System.out.println("Start len " + blockstart + " " + blocklen); for (int jj = blockstart; jj < blockstart + blocklen;jj++) { blocks.put(new Integer(jj),new Integer(blocklen)); } bstart = -1; } prev = found; } if (bstart != -1) { int blockstart = bstart-start; int blocklen = end-bstart; // System.out.println("Start len " + blockstart + " " + blocklen); for (int jj = blockstart; jj < blockstart + blocklen;jj++) { blocks.put(new Integer(blockstart),new Integer(blocklen)); } } return blocks; } public static Hashtable findKmerCount(SequenceI seq, int start, int end,int window, int step,Vector kmers) { int tmpstart = start; Hashtable vals = new Hashtable(); while (tmpstart <= end) { String tmpstr = seq.getSequence().substring(tmpstart-window/2,tmpstart+window/2); int count = 0; //System.out.println("Str " + tmpstr); for (int ii = 0; ii < kmers.size(); ii++) { String kmer = ((SequenceI)kmers.elementAt(ii)).getSequence(); int i = -1; while (tmpstr.indexOf(kmer,i) != -1) { i = tmpstr.indexOf(kmer,i); i++; count++; } ii++; } vals.put(new Integer(tmpstart),new Integer(count)); tmpstart += step; } return vals; } public static Hashtable findBlockStarts(Vector seqs, int start, int end,Vector exc) { // start and end are in real (not relative coords); // The coords in the hashtable that is returned are in relative coords // i.e. start from 0 Hashtable blocks = new Hashtable(); boolean prev = false; int bstart = -1; for (int i = start; i <= end ; i++) { SequenceI seq = (SequenceI)seqs.elementAt(0); char c = seq.getCharAt(i); boolean found = true; int j = 1; while (j < seqs.size() && found == true) { SequenceI jseq = (SequenceI)seqs.elementAt(j); if (!exc.contains(jseq)) { char cc = jseq.getCharAt(i); if ( cc != c) { found = false; } } j++; } if (prev == false && found == true) { bstart = i; } else if (prev == true && found == false && bstart != -1) { int blockstart = bstart-start; int blocklen = i-bstart; // System.out.println("Start len " + blockstart + " " + blocklen); //for (int jj = blockstart; jj < blockstart + blocklen;jj++) { blocks.put(new Integer(blockstart),new Integer(blocklen)); // } bstart = -1; } prev = found; } if (bstart != -1) { int blockstart = bstart-start; int blocklen = end-bstart; // System.out.println("Start len " + blockstart + " " + blocklen); //for (int jj = blockstart; jj < blockstart + blocklen;jj++) { blocks.put(new Integer(blockstart),new Integer(blocklen)); // } } return blocks; } }