1 package jalview.analysis;
\r
3 import jalview.datamodel.*;
\r
5 import jalview.util.*;
\r
9 public class AlignmentUtil {
\r
11 private AlignmentUtil() {
\r
14 public static int[][] percentIdentity2(AlignmentI align) {
\r
15 return percentIdentity2(align,0,align.getWidth()-1);
\r
18 public static int[][] percentIdentity2(AlignmentI align, int start, int end) {
\r
19 int [][] cons2 = new int[align.getWidth()][24];
\r
20 // Initialize the array
\r
21 for (int j=0;j<24;j++) {
\r
22 for (int i=0; i < align.getWidth();i++) {
\r
29 public static int getPixelHeight(int i, int j,int charHeight) {
\r
39 public static Vector substitution_rates (AlignmentI align, int start, int end) {
\r
41 Vector rates = new Vector();
\r
43 int len = (end-start+1);
\r
45 // Turn seqs into char arrays
\r
47 int[][] seqint = new int[align.getHeight()][len];
\r
50 for (int j = 0; j < align.getHeight(); j++) {
\r
52 SequenceI seq = align.getSequenceAt(j);
\r
54 for (int i = 0 ; i < len; i++) {
\r
55 char c = seq.getCharAt(start + i - 1);
\r
59 } else if (c == 'C') {
\r
61 } else if (c == 'T') {
\r
63 } else if (c == 'G') {
\r
74 // print_matrix(seqint,2,len); for (int j = 0; j < align.getHeight(); j++) {
\r
76 for (int j = 0; j < align.getHeight(); j++) {
\r
78 for (int k = 0; k < align.getHeight(); k++) {
\r
80 int counts[][] = new int[4][4];
\r
82 int tots[] = new int[4];
\r
84 int fulltots[] = new int[4];
\r
86 for (int i = 0 ; i < len; i++) {
\r
90 // System.out.println("Seq " + j + " " + k + " " + i + " " + seqint[j][i] + " " + seqint[k][i]);
\r
91 if (seqint[j][i] >= 0 &&
\r
92 seqint[k][i] >= 0) {
\r
93 counts[seqint[k][i]][seqint[j][i]]++;
\r
95 // print_matrix(counts,4,4);
\r
96 tots[seqint[j][i]]++;
\r
99 if (seqint[j][i] != -1) {
\r
100 fulltots[seqint[j][i]]++;
\r
108 System.out.println();
\r
110 System.out.println("Sequence " + align.getSequenceAt(j).getName() + " " + align.getSequenceAt(k).getName());
\r
112 System.out.println();
\r
113 print_matrix(counts,4,4);
\r
114 System.out.println();
\r
118 double[][] out = new double[4][4];// = constant_multiply_matrix(counts,1.0/tot,4,4);
\r
120 for (int i = 0; i < 4; i++) {
\r
121 for (int jj = 0; jj < 4; jj++) {
\r
122 out[i][jj] = (double)counts[i][jj]/tots[jj];
\r
126 print_matrix(out,4,4);
\r
127 System.out.println();
\r
130 System.out.print("RATES\t");
\r
131 System.out.print(align.getSequenceAt(j).getName() + "\t" + align.getSequenceAt(k).getName() + "\t");
\r
133 for (int i = 0; i < 4; i++) {
\r
134 for (int jj = 0; jj < 4; jj++) {
\r
135 Format.print(System.out,"%4.3f\t",out[i][jj]);
\r
138 System.out.println();
\r
140 for (int i = 0; i < 4; i++) {
\r
141 Format.print(System.out,"%4.3f\t",(double)fulltots[i]*1.0/fulltot);
\r
144 System.out.println();
\r
145 System.out.print("\nGC ");
\r
147 Format.print(System.out,"%4.3f\t",(double)(100*fulltots[1]+fulltots[3])/fulltot);
\r
149 System.out.print((fulltots[1]+fulltots[3]) + "\t" + fulltot);
\r
152 System.out.println();
\r
154 rates.addElement(out);
\r
162 public static double[][] constant_multiply_matrix(int[][] matrix, double c,int n, int m) {
\r
163 double[][] out = new double[n][m];
\r
165 for (int i = 0; i < n; i++) {
\r
166 for (int j = 0; j < m; j++) {
\r
168 out[i][j] = matrix[i][j]*c;
\r
175 public static void print_matrix(int[][] matrix, int n, int m) {
\r
178 for (int i = 0; i < n; i++) {
\r
179 for (int j = 0; j < m; j++) {
\r
181 System.out.print(matrix[i][j] + "\t");
\r
183 System.out.println();
\r
186 public static void print_matrix(double[][] matrix, int n, int m) {
\r
189 for (int i = 0; i < n; i++) {
\r
190 for (int j = 0; j < m; j++) {
\r
192 Format.print(System.out,"%4.3f\t",matrix[i][j]);
\r
195 System.out.println();
\r
199 public static Hashtable findKmers(SequenceI seq, int start, int end, Vector kmers) {
\r
201 Hashtable pos = new Hashtable();
\r
203 for (int j = 0; j < kmers.size(); j++) {
\r
205 String kmer = ((Sequence)kmers.elementAt(j)).getSequence();
\r
206 Sequence kmerseq = (Sequence)kmers.elementAt(j);
\r
208 if (end < seq.getLength()) {
\r
210 String str = seq.getSequence(start,end);
\r
214 while (str.indexOf(kmer,i) != -1) {
\r
216 // System.out.println("STring " + str + " " + i);
\r
217 int coord = str.indexOf(kmer,i);
\r
219 pos.put(new Integer(coord),new Integer(kmerseq.getLength()));
\r
229 class FeatureThread extends Thread
\r
234 public static void addUniprotFeatures(AlignmentI align)
\r
236 EBIFetchClient ebi = new EBIFetchClient();
\r
238 Vector sequences = align.getSequences();
\r
239 SequenceI sequence;
\r
241 while (seqIndex < sequences.size())
\r
243 StringBuffer ids = new StringBuffer("uniprot:");
\r
244 for (int i=0; seqIndex<sequences.size() && i<50; seqIndex++, i++)
\r
246 sequence = (SequenceI) sequences.get(seqIndex);
\r
247 ids.append(sequence.getName() + ";");
\r
250 String[] result = ebi.fetchData(ids.toString(), "xml", null);
\r
252 Vector features = null;
\r
253 String type, description, status, start, end, pdb = null;
\r
255 for (int r = 0; r < result.length; r++)
\r
257 if(sequence==null && result[r].indexOf("<name>")>-1)
\r
259 sequence = align.findName( parseElement( result[r], "<name>" )) ;
\r
260 features = new Vector();
\r
261 type=""; start="0"; end="0"; description=""; status=""; pdb="";
\r
267 if( result[r].indexOf("<property type=\"pdb accession\"")>-1)
\r
269 pdb = parseValue( result[r], "value=" );
\r
270 sequence.setPDBId(pdb);
\r
273 if(result[r].indexOf("feature type")>-1)
\r
275 type = parseValue( result[r], "type=" );
\r
276 description = parseValue( result[r], "description=" );
\r
277 status = parseValue ( result[r], "status=");
\r
279 while( result[r].indexOf("<location>")==-1)
\r
284 if(result[r].indexOf("begin")>-1)
\r
286 start = parseValue( result[r], "position=" );
\r
287 end = parseValue( result[++r], "position=" );
\r
291 start = parseValue( result[r], "position=" );
\r
292 end = parseValue( result[r], "position=" );
\r
294 int sstart = Integer.parseInt(start);
\r
295 int eend = Integer.parseInt(end);
\r
297 if(sstart>=sequence.getStart() && eend<=sequence.getEnd())
\r
299 SequenceFeature sf = new SequenceFeature(type,
\r
308 if(result[r].indexOf("</entry>")>-1)
\r
310 sequence.setSequenceFeatures( features );
\r
318 static String parseValue(String line, String tag)
\r
321 int index = line.indexOf(tag)+tag.length()+1;
\r
322 if(index==tag.length())
\r
325 return line.substring( index, line.indexOf("\"", index+1) );
\r
329 static String parseElement(String line, String tag)
\r
331 int index = line.indexOf(tag)+tag.length();
\r
332 return line.substring( index, line.indexOf("</") ) ;
\r