X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAAFrequency.java;h=f409f8c59e50d43aa675c63aedda44a0e46799f8;hb=49b625d7c10b85425059a51706a2aca96deed955;hp=ad1fe4e9b7911137ac7fd5e086d47b135b0c9a7b;hpb=f24dacb1da56fccf05d684e2f4899facec2aecf7;p=jalview.git diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index ad1fe4e..f409f8c 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -1,6 +1,6 @@ /* * Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -24,10 +24,8 @@ import java.util.*; /** - * Takes in a vector of sequences and column start and column end - * and returns a vector of size (end-start+1). Each element of the - * vector contains a hashtable with the keys being residues and - * the values being the count of each residue in that column. + * Takes in a vector or array of sequences and column start and column end + * and returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied. * This class is used extensively in calculating alignment colourschemes * that depend on the amount of conservation in each alignment column. * @author $author$ @@ -35,105 +33,113 @@ import java.util.*; */ public class AAFrequency { - /** Takes in a vector of sequences and column start and column end - * and returns a vector of size (end-start+1). Each element of the - * vector contains a hashtable with the keys being residues and - * the values being the count of each residue in that column. - * This class is used extensively in calculating alignment colourschemes - * that depend on the amount of conservation in each alignment column. */ - public static final Vector calculate(Vector sequences, int start, int end) + //No need to store 1000s of strings which are not + //visible to the user. + public static final String MAXCOUNT = "C"; + public static final String MAXRESIDUE="R"; + public static final String PID_GAPS = "G"; + public static final String PID_NOGAPS="N"; + + public static final Hashtable [] calculate(Vector sequences, int start, int end) + { + SequenceI [] seqs = new SequenceI[sequences.size()]; + int width = 0; + for(int i=0; iwidth) + width = seqs[i].getLength(); + } + + Hashtable [] reply = new Hashtable[width]; + + calculate(seqs, start, end, reply); + + return reply; + } + +public static final void calculate(SequenceI[] sequences, + int start, int end, + Hashtable [] result) +{ + Hashtable residueHash; + int maxCount, nongap, i, j, v, jSize = sequences.length; + String maxResidue; + char c; + float percentage; + + int[] values = new int[132]; + + String seq; + + for (i = start; i < end; i++) + { + residueHash = new Hashtable(); + maxCount = 0; + maxResidue = ""; + nongap = 0; + values = new int[132]; + + for (j = 0; j < jSize; j++) { - Vector result = new Vector(); - Hashtable residueHash; - int count, maxCount, nongap, i, j, jSize = sequences.size(); - String maxResidue, sequence, res; - float percentage; + seq = sequences[j].getSequence(); + if (seq.length() > i) + { + c = seq.charAt(i); - for (i = start; i <= end; i++) + if(c == '.' || c==' ') + c = '-'; + + if(c=='-') + { + values['-']++; + continue; + } + else if ('a' <= c && c <= 'z') { - residueHash = new Hashtable(); - maxCount = 0; - maxResidue = "-"; - nongap = 0; - - for (j = 0; j < jSize; j++) - { - if (sequences.elementAt(j) instanceof Sequence) - { - sequence = ((Sequence) sequences.elementAt(j)).getSequence(); - - if (sequence.length() > i) - { - res = String.valueOf(Character.toUpperCase(sequence.charAt(i))); - - if (jalview.util.Comparison.isGap(res.charAt(0))) - { - res = "-"; // we always use this for gaps in the property vectors - } - else - { nongap++; } - - if (residueHash.containsKey(res)) - { - count = ((Integer) residueHash.get(res)).intValue(); - count++; - - if (!jalview.util.Comparison.isGap(res.charAt(0)) && - (count >= maxCount)) - { - if (count > maxCount) - { - maxResidue = res; - } - else if (maxResidue.indexOf(res) == -1) - { - maxResidue += res; - } - - maxCount = count; - } - - residueHash.put(res, new Integer(count)); - } - else - { - residueHash.put(res, new Integer(1)); - } - } - else - { - if (residueHash.containsKey("-")) - { - count = ((Integer) residueHash.get("-")).intValue(); - count++; - residueHash.put("-", new Integer(count)); - } - else - { - residueHash.put("-", new Integer(1)); - } - } - } - } - - residueHash.put("maxCount", new Integer(maxCount)); - residueHash.put("maxResidue", maxResidue); - - - //Size is redundant at present if we calculate percentage here - //residueHash.put("size", new Integer(jSize)); - //residueHash.put("nogaps", new Integer(nongap)); - - percentage = ((float)maxCount*100) / (float)jSize; - residueHash.put("pid_gaps", new Float(percentage) ); - - percentage = ((float)maxCount*100) / (float)nongap; - residueHash.put("pid_nogaps", new Float(percentage) ); - result.addElement(residueHash); + c -= 32 ;//('a' - 'A'); } + nongap++; + values[c]++; + } + else + { + values['-']++; + } + } - return result; + for (v = 'A'; v < 'Z'; v++) + { + if (values[v] < 2 || values[v] < maxCount) + continue; + + if (values[v] > maxCount) + { + maxResidue = String.valueOf( (char) v); + } + else if (values[v] == maxCount) + { + maxResidue += String.valueOf( (char) v); + } + maxCount = values[v]; } + + if(maxResidue.length()==0) + maxResidue = "-"; + + residueHash.put(MAXCOUNT, new Integer(maxCount)); + residueHash.put(MAXRESIDUE, maxResidue); + + percentage = ( (float) maxCount * 100) / (float) jSize; + residueHash.put(PID_GAPS, new Float(percentage)); + + percentage = ( (float) maxCount * 100) / (float) nongap; + residueHash.put(PID_NOGAPS, new Float(percentage)); + result[i] = residueHash; + } +} } + +