X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAAFrequency.java;h=f409f8c59e50d43aa675c63aedda44a0e46799f8;hb=49b625d7c10b85425059a51706a2aca96deed955;hp=15618de9beddc09413c338604d622d8041b3eef1;hpb=d69ea8f1997771890b44e4b332a7ca84fe6f0893;p=jalview.git diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index 15618de..f409f8c 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -24,10 +24,8 @@ import java.util.*; /** - * Takes in a vector of sequences and column start and column end - * and returns a vector of size (end-start+1). Each element of the - * vector contains a hashtable with the keys being residues and - * the values being the count of each residue in that column. + * Takes in a vector or array of sequences and column start and column end + * and returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied. * This class is used extensively in calculating alignment colourschemes * that depend on the amount of conservation in each alignment column. * @author $author$ @@ -35,12 +33,12 @@ import java.util.*; */ public class AAFrequency { - /** Takes in a !!ARRAY!! of sequences and column start and column end - * and fills given Vector of size (end-start+1). Each element of the - * vector contains a hashtable with the keys being residues and - * the values being the count of each residue in that column. - * This class is used extensively in calculating alignment colourschemes - * that depend on the amount of conservation in each alignment column. */ + //No need to store 1000s of strings which are not + //visible to the user. + public static final String MAXCOUNT = "C"; + public static final String MAXRESIDUE="R"; + public static final String PID_GAPS = "G"; + public static final String PID_NOGAPS="N"; public static final Hashtable [] calculate(Vector sequences, int start, int end) { @@ -72,34 +70,37 @@ public static final void calculate(SequenceI[] sequences, int[] values = new int[132]; + String seq; + for (i = start; i < end; i++) { residueHash = new Hashtable(); maxCount = 0; - maxResidue = "-"; + maxResidue = ""; nongap = 0; values = new int[132]; for (j = 0; j < jSize; j++) { - if (sequences[j].getLength() > i) + seq = sequences[j].getSequence(); + if (seq.length() > i) { - c = sequences[j].getCharAt(i); + c = seq.charAt(i); - if ('a' <= c && c <= 'z') - { - c -= ('a' - 'A'); - } + if(c == '.' || c==' ') + c = '-'; - if (jalview.util.Comparison.isGap(c)) + if(c=='-') { - c = '-'; // we always use this for gaps in the property vectors + values['-']++; + continue; } - else + else if ('a' <= c && c <= 'z') { - nongap++; + c -= 32 ;//('a' - 'A'); } + nongap++; values[c]++; } @@ -111,7 +112,7 @@ public static final void calculate(SequenceI[] sequences, for (v = 'A'; v < 'Z'; v++) { - if (values[v] == 0 || values[v] < maxCount) + if (values[v] < 2 || values[v] < maxCount) continue; if (values[v] > maxCount) @@ -125,15 +126,17 @@ public static final void calculate(SequenceI[] sequences, maxCount = values[v]; } + if(maxResidue.length()==0) + maxResidue = "-"; - residueHash.put("maxCount", new Integer(maxCount)); - residueHash.put("maxResidue", maxResidue); + residueHash.put(MAXCOUNT, new Integer(maxCount)); + residueHash.put(MAXRESIDUE, maxResidue); percentage = ( (float) maxCount * 100) / (float) jSize; - residueHash.put("pid_gaps", new Float(percentage)); + residueHash.put(PID_GAPS, new Float(percentage)); percentage = ( (float) maxCount * 100) / (float) nongap; - residueHash.put("pid_nogaps", new Float(percentage)); + residueHash.put(PID_NOGAPS, new Float(percentage)); result[i] = residueHash; } }