src/jalview/analysis/CrossRef.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.analysis;
  22
  23 import jalview.datamodel.AlignedCodonFrame;
  24 import jalview.datamodel.Alignment;
  25 import jalview.datamodel.AlignmentI;
  26 import jalview.datamodel.DBRefEntry;
  27 import jalview.datamodel.DBRefSource;
  28 import jalview.datamodel.Mapping;
  29 import jalview.datamodel.Sequence;
  30 import jalview.datamodel.SequenceFeature;
  31 import jalview.datamodel.SequenceI;
  32 import jalview.io.gff.SequenceOntology;
  33 import jalview.schemes.ResidueProperties;
  34 import jalview.util.DBRefUtils;
  35 import jalview.util.MapList;
  36 import jalview.util.MappingUtils;
  37 import jalview.util.StringUtils;
  38 import jalview.ws.SequenceFetcher;
  39 import jalview.ws.seqfetcher.ASequenceFetcher;
  40
  41 import java.util.ArrayList;
  42 import java.util.Collections;
  43 import java.util.LinkedHashMap;
  44 import java.util.List;
  45 import java.util.Map.Entry;
  46 import java.util.Vector;
  47
  48 /**
  49  * Functions for cross-referencing sequence databases. user must first specify
  50  * if cross-referencing from protein or dna (set dna==true)
  51  *
  52  * @author JimP
  53  *
  54  */
  55 public class CrossRef
  56 {
  57   /**
  58    * Select just the DNA or protein references for a protein or dna sequence
  59    *
  60    * @param fromDna
  61    *          if true, select references from DNA (i.e. Protein databases), else
  62    *          DNA database references
  63    * @param refs
  64    *          a set of references to select from
  65    * @return
  66    */
  67   public static DBRefEntry[] findXDbRefs(boolean fromDna, DBRefEntry[] refs)
  68   {
  69     return DBRefUtils.selectRefs(refs, fromDna ? DBRefSource.PROTEINDBS
  70             : DBRefSource.DNACODINGDBS);
  71     // could attempt to find other cross
  72     // refs here - ie PDB xrefs
  73     // (not dna, not protein seq)
  74   }
  75
  76   /**
  77    * @param dna
  78    *          true if seqs are DNA seqs
  79    * @param seqs
  80    * @return a list of sequence database cross reference source types
  81    */
  82   public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs)
  83   {
  84     return findSequenceXrefTypes(dna, seqs, null);
  85   }
  86
  87   /**
  88    * Indirect references are references from other sequences from the dataset to
  89    * any of the direct DBRefEntrys on the given sequences.
  90    *
  91    * @param dna
  92    *          true if seqs are DNA seqs
  93    * @param seqs
  94    * @return a list of sequence database cross reference source types
  95    */
  96   public static String[] findSequenceXrefTypes(boolean dna,
  97           SequenceI[] seqs, AlignmentI dataset)
  98   {
  99     String[] dbrefs = null;
 100     List<String> refs = new ArrayList<String>();
 101     for (SequenceI seq : seqs)
 102     {
 103       if (seq != null)
 104       {
 105         SequenceI dss = seq;
 106         while (dss.getDatasetSequence() != null)
 107         {
 108           dss = dss.getDatasetSequence();
 109         }
 110         DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef());
 111         if (rfs != null)
 112         {
 113           for (DBRefEntry ref : rfs)
 114           {
 115             if (!refs.contains(ref.getSource()))
 116             {
 117               refs.add(ref.getSource());
 118             }
 119           }
 120         }
 121         if (dataset != null)
 122         {
 123           // search for references to this sequence's direct references.
 124           DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRef());
 125           List<SequenceI> rseqs = new ArrayList<SequenceI>();
 126           CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs,
 127                   null); // don't need to specify codon frame for mapping here
 128           for (SequenceI rs : rseqs)
 129           {
 130             DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef());
 131             if (xrs != null)
 132             {
 133               for (DBRefEntry ref : xrs)
 134               {
 135                 if (!refs.contains(ref.getSource()))
 136                 {
 137                   refs.add(ref.getSource());
 138                 }
 139               }
 140             }
 141             // looks like copy and paste - change rfs to xrs?
 142             // for (int r = 0; rfs != null && r < rfs.length; r++)
 143             // {
 144             // if (!refs.contains(rfs[r].getSource()))
 145             // {
 146             // refs.add(rfs[r].getSource());
 147             // }
 148             // }
 149           }
 150         }
 151       }
 152     }
 153     if (refs.size() > 0)
 154     {
 155       dbrefs = new String[refs.size()];
 156       refs.toArray(dbrefs);
 157     }
 158     return dbrefs;
 159   }
 160
 161   public static boolean hasCdnaMap(SequenceI[] seqs)
 162   {
 163     // TODO unused - remove?
 164     String[] reftypes = findSequenceXrefTypes(false, seqs);
 165     for (int s = 0; s < reftypes.length; s++)
 166     {
 167       if (reftypes.equals(DBRefSource.EMBLCDS))
 168       {
 169         return true;
 170         // no map
 171       }
 172     }
 173     return false;
 174   }
 175
 176   public static SequenceI[] getCdnaMap(SequenceI[] seqs)
 177   {
 178     // TODO unused - remove?
 179     Vector cseqs = new Vector();
 180     for (int s = 0; s < seqs.length; s++)
 181     {
 182       DBRefEntry[] cdna = findXDbRefs(true, seqs[s].getDBRef());
 183       for (int c = 0; c < cdna.length; c++)
 184       {
 185         if (cdna[c].getSource().equals(DBRefSource.EMBLCDS))
 186         {
 187           System.err
 188                   .println("TODO: unimplemented sequence retrieval for coding region sequence.");
 189           // TODO: retrieve CDS dataset sequences
 190           // need global dataset sequence retriever/resolver to reuse refs
 191           // and construct Mapping entry.
 192           // insert gaps in CDS according to peptide gaps.
 193           // add gapped sequence to cseqs
 194         }
 195       }
 196     }
 197     if (cseqs.size() > 0)
 198     {
 199       SequenceI[] rsqs = new SequenceI[cseqs.size()];
 200       cseqs.copyInto(rsqs);
 201       return rsqs;
 202     }
 203     return null;
 204
 205   }
 206
 207   /**
 208    *
 209    * @param dna
 210    * @param seqs
 211    * @return
 212    */
 213   public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
 214           String source)
 215   {
 216     return findXrefSequences(seqs, dna, source, null);
 217   }
 218
 219   /**
 220    *
 221    * @param seqs
 222    *          sequences whose xrefs are being retrieved
 223    * @param dna
 224    *          true if sequences are nucleotide
 225    * @param source
 226    * @param dataset
 227    *          alignment to search for product sequences.
 228    * @return products (as dataset sequences)
 229    */
 230   public static Alignment findXrefSequences(SequenceI[] seqs, boolean dna,
 231           String source, AlignmentI dataset)
 232   {
 233     List<SequenceI> rseqs = new ArrayList<SequenceI>();
 234     AlignedCodonFrame cf = new AlignedCodonFrame();
 235     for (SequenceI seq : seqs)
 236     {
 237       SequenceI dss = seq;
 238       while (dss.getDatasetSequence() != null)
 239       {
 240         dss = dss.getDatasetSequence();
 241       }
 242       boolean found = false;
 243       DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRef());
 244       if ((xrfs == null || xrfs.length == 0) && dataset != null)
 245       {
 246         System.out.println("Attempting to find ds Xrefs refs.");
 247         // FIXME should be dss not seq here?
 248         DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRef());
 249         // less ambiguous would be a 'find primary dbRefEntry' method.
 250         // filter for desired source xref here
 251         found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset,
 252                 rseqs, cf);
 253       }
 254       for (int r = 0; xrfs != null && r < xrfs.length; r++)
 255       {
 256         DBRefEntry xref = xrfs[r];
 257         if (source != null && !source.equals(xref.getSource()))
 258         {
 259           continue;
 260         }
 261         if (xref.hasMap())
 262         {
 263           if (xref.getMap().getTo() != null)
 264           {
 265             SequenceI rsq = new Sequence(xref.getMap().getTo());
 266             rseqs.add(rsq);
 267             if (xref.getMap().getMap().getFromRatio() != xref
 268                     .getMap().getMap().getToRatio())
 269             {
 270               // get sense of map correct for adding to product alignment.
 271               if (dna)
 272               {
 273                 // map is from dna seq to a protein product
 274                 cf.addMap(dss, rsq, xref.getMap().getMap());
 275               }
 276               else
 277               {
 278                 // map should be from protein seq to its coding dna
 279                 cf.addMap(rsq, dss, xref.getMap().getMap().getInverse());
 280               }
 281
 282               /*
 283                * compute peptide variants from dna variants
 284                */
 285               rsq.createDatasetSequence();
 286               computeProteinVariants(seq, rsq, xref.getMap().getMap());
 287             }
 288             found = true;
 289           }
 290         }
 291         if (!found)
 292         {
 293           // do a bit more work - search for sequences with references matching
 294           // xrefs on this sequence.
 295           if (dataset != null)
 296           {
 297             found |= searchDataset(dss, xref, dataset, rseqs, cf); // ,false,!dna);
 298             if (found)
 299             {
 300               xrfs[r] = null; // we've recovered seqs for this one.
 301             }
 302           }
 303         }
 304       }
 305       if (!found)
 306       {
 307         if (xrfs != null && xrfs.length > 0)
 308         {
 309           // Try and get the sequence reference...
 310           /*
 311            * Ideal world - we ask for a sequence fetcher implementation here if
 312            * (jalview.io.RunTimeEnvironment.getSequenceFetcher()) (
 313            */
 314           ASequenceFetcher sftch = new SequenceFetcher();
 315           SequenceI[] retrieved = null;
 316           int l = xrfs.length;
 317           for (int r = 0; r < xrfs.length; r++)
 318           {
 319             // filter out any irrelevant or irretrievable references
 320             if (xrfs[r] == null
 321                     || ((source != null && !source.equals(xrfs[r]
 322                             .getSource())) || !sftch.isFetchable(xrfs[r]
 323                             .getSource())))
 324             {
 325               l--;
 326               xrfs[r] = null;
 327             }
 328           }
 329           if (l > 0)
 330           {
 331             System.out
 332                     .println("Attempting to retrieve cross referenced sequences.");
 333             DBRefEntry[] t = new DBRefEntry[l];
 334             l = 0;
 335             for (int r = 0; r < xrfs.length; r++)
 336             {
 337               if (xrfs[r] != null)
 338               {
 339                 t[l++] = xrfs[r];
 340               }
 341             }
 342             xrfs = t;
 343             try
 344             {
 345               retrieved = sftch.getSequences(xrfs, !dna);
 346               // problem here is we don't know which of xrfs resulted in which
 347               // retrieved element
 348             } catch (Exception e)
 349             {
 350               System.err
 351                       .println("Problem whilst retrieving cross references for Sequence : "
 352                               + seq.getName());
 353               e.printStackTrace();
 354             }
 355             if (retrieved != null)
 356             {
 357               for (int rs = 0; rs < retrieved.length; rs++)
 358               {
 359                 // TODO: examine each sequence for 'redundancy'
 360                 DBRefEntry[] dbr = retrieved[rs].getDBRef();
 361                 if (dbr != null && dbr.length > 0)
 362                 {
 363                   for (int di = 0; di < dbr.length; di++)
 364                   {
 365                     // find any entry where we should put in the sequence being
 366                     // cross-referenced into the map
 367                     Mapping map = dbr[di].getMap();
 368                     if (map != null)
 369                     {
 370                       if (map.getTo() != null && map.getMap() != null)
 371                       {
 372                         // should search the local dataset to find any existing
 373                         // candidates for To !
 374                         try
 375                         {
 376                           // compare ms with dss and replace with dss in mapping
 377                           // if map is congruent
 378                           SequenceI ms = map.getTo();
 379                           int sf = map.getMap().getToLowest();
 380                           int st = map.getMap().getToHighest();
 381                           SequenceI mappedrg = ms.getSubSequence(sf, st);
 382                           SequenceI loc = dss.getSubSequence(sf, st);
 383                           if (mappedrg.getLength() > 0
 384                                   && mappedrg.getSequenceAsString().equals(
 385                                           loc.getSequenceAsString()))
 386                           {
 387                             System.err
 388                                     .println("Mapping updated for retrieved crossreference");
 389                             // method to update all refs of existing To on
 390                             // retrieved sequence with dss and merge any props
 391                             // on To onto dss.
 392                             map.setTo(dss);
 393                           }
 394                         } catch (Exception e)
 395                         {
 396                           System.err
 397                                   .println("Exception when consolidating Mapped sequence set...");
 398                           e.printStackTrace(System.err);
 399                         }
 400                       }
 401                     }
 402                   }
 403                 }
 404                 retrieved[rs].updatePDBIds();
 405                 rseqs.add(retrieved[rs]);
 406               }
 407             }
 408           }
 409         }
 410       }
 411     }
 412
 413     Alignment ral = null;
 414     if (rseqs.size() > 0)
 415     {
 416       SequenceI[] rsqs = new SequenceI[rseqs.size()];
 417       rseqs.toArray(rsqs);
 418       ral = new Alignment(rsqs);
 419       if (cf != null && !cf.isEmpty())
 420       {
 421         ral.addCodonFrame(cf);
 422       }
 423     }
 424     return ral;
 425   }
 426
 427   /**
 428    * find references to lrfs in the cross-reference set of each sequence in
 429    * dataset (that is not equal to sequenceI) Identifies matching DBRefEntry
 430    * based on source and accession string only - Map and Version are nulled.
 431    *
 432    * @param sequenceI
 433    * @param lrfs
 434    * @param dataset
 435    * @param rseqs
 436    * @return true if matches were found.
 437    */
 438   private static boolean searchDatasetXrefs(SequenceI sequenceI,
 439           boolean dna, DBRefEntry[] lrfs, AlignmentI dataset,
 440           List<SequenceI> rseqs, AlignedCodonFrame cf)
 441   {
 442     boolean found = false;
 443     if (lrfs == null)
 444     {
 445       return false;
 446     }
 447     for (int i = 0; i < lrfs.length; i++)
 448     {
 449       DBRefEntry xref = new DBRefEntry(lrfs[i]);
 450       // add in wildcards
 451       xref.setVersion(null);
 452       xref.setMap(null);
 453       found = searchDataset(sequenceI, xref, dataset, rseqs, cf, false, dna);
 454     }
 455     return found;
 456   }
 457
 458   /**
 459    * search a given sequence dataset for references matching cross-references to
 460    * the given sequence
 461    *
 462    * @param sequenceI
 463    * @param xrf
 464    * @param dataset
 465    * @param rseqs
 466    *          set of unique sequences
 467    * @param cf
 468    * @return true if one or more unique sequences were found and added
 469    */
 470   public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
 471           AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf)
 472   {
 473     return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false);
 474   }
 475
 476   /**
 477    * TODO: generalise to different protein classifications Search dataset for
 478    * DBRefEntrys matching the given one (xrf) and add the associated sequence to
 479    * rseq.
 480    *
 481    * @param sequenceI
 482    * @param xrf
 483    * @param dataset
 484    * @param rseqs
 485    * @param direct
 486    *          - search all references or only subset
 487    * @param dna
 488    *          search dna or protein xrefs (if direct=false)
 489    * @return true if relationship found and sequence added.
 490    */
 491   public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf,
 492           AlignmentI dataset, List<SequenceI> rseqs, AlignedCodonFrame cf,
 493           boolean direct, boolean dna)
 494   {
 495     boolean found = false;
 496     SequenceI[] typer = new SequenceI[1];
 497     if (dataset == null)
 498     {
 499       return false;
 500     }
 501     if (dataset.getSequences() == null)
 502     {
 503       System.err.println("Empty dataset sequence set - NO VECTOR");
 504       return false;
 505     }
 506     List<SequenceI> ds;
 507     synchronized (ds = dataset.getSequences())
 508     {
 509       for (SequenceI nxt : ds)
 510       {
 511         if (nxt != null)
 512         {
 513           if (nxt.getDatasetSequence() != null)
 514           {
 515             System.err
 516                     .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!");
 517           }
 518           if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence())
 519           {
 520             // check if this is the correct sequence type
 521             {
 522               typer[0] = nxt;
 523               boolean isDna = jalview.util.Comparison.isNucleotide(typer);
 524               if ((direct && isDna == dna) || (!direct && isDna != dna))
 525               {
 526                 // skip this sequence because it is same molecule type
 527                 continue;
 528               }
 529             }
 530
 531             // look for direct or indirect references in common
 532             DBRefEntry[] poss = nxt.getDBRef(), cands = null;
 533             if (direct)
 534             {
 535               cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
 536             }
 537             else
 538             {
 539               poss = CrossRef.findXDbRefs(dna, poss); //
 540               cands = jalview.util.DBRefUtils.searchRefs(poss, xrf);
 541             }
 542             if (cands != null)
 543             {
 544               if (!rseqs.contains(nxt))
 545               {
 546                 rseqs.add(nxt);
 547                 boolean foundmap = cf != null;
 548                 // don't search if we aren't given a codon map object
 549                 for (int r = 0; foundmap && r < cands.length; r++)
 550                 {
 551                   if (cands[r].hasMap())
 552                   {
 553                     if (cands[r].getMap().getTo() != null
 554                             && cands[r].getMap().getMap().getFromRatio() != cands[r]
 555                                     .getMap().getMap().getToRatio())
 556                     {
 557                       foundmap = true;
 558                       // get sense of map correct for adding to product
 559                       // alignment.
 560                       if (dna)
 561                       {
 562                         // map is from dna seq to a protein product
 563                         cf.addMap(sequenceI, nxt, cands[r].getMap()
 564                                 .getMap());
 565                       }
 566                       else
 567                       {
 568                         // map should be from protein seq to its coding dna
 569                         cf.addMap(nxt, sequenceI, cands[r].getMap()
 570                                 .getMap().getInverse());
 571                       }
 572                     }
 573                   }
 574                 }
 575                 // TODO: add mapping between sequences if necessary
 576                 found = true;
 577               }
 578             }
 579
 580           }
 581         }
 582       }
 583     }
 584     return found;
 585   }
 586
 587   /**
 588    * Computes variants in peptide product generated by variants in dna, and adds
 589    * them as sequence_variant features on the protein sequence. Returns the
 590    * number of variant features added.
 591    *
 592    * @param dnaSeq
 593    * @param peptide
 594    * @param dnaToProtein
 595    */
 596   protected static int computeProteinVariants(SequenceI dnaSeq,
 597           SequenceI peptide, MapList dnaToProtein)
 598   {
 599     /*
 600      * map from peptide position to all variant features of the codon for it
 601      * LinkedHashMap ensures we add the peptide features in sequence order
 602      */
 603     LinkedHashMap<Integer, String[][]> variants = new LinkedHashMap<Integer, String[][]>();
 604     SequenceOntology so = SequenceOntology.getInstance();
 605
 606     SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
 607     if (dnaFeatures == null)
 608     {
 609       return 0;
 610     }
 611
 612     int[] lastCodon = null;
 613     int lastPeptidePostion = 0;
 614
 615     /*
 616      * build a map of codon variations for peptides
 617      */
 618     for (SequenceFeature sf : dnaFeatures)
 619     {
 620       int dnaCol = sf.getBegin();
 621       if (dnaCol != sf.getEnd())
 622       {
 623         // not handling multi-locus variant features
 624         continue;
 625       }
 626       if (so.isSequenceVariant(sf.getType()))
 627       {
 628         int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
 629         if (mapsTo == null)
 630         {
 631           // feature doesn't lie within coding region
 632           continue;
 633         }
 634         int peptidePosition = mapsTo[0];
 635         String[][] codonVariants = variants.get(peptidePosition);
 636         if (codonVariants == null)
 637         {
 638           codonVariants = new String[3][];
 639           variants.put(peptidePosition, codonVariants);
 640         }
 641
 642         /*
 643          * extract dna variants to a string array
 644          */
 645         String alls = (String) sf.getValue("alleles");
 646         if (alls == null)
 647         {
 648           continue;
 649         }
 650         String[] alleles = alls.split(",");
 651
 652         /*
 653          * get this peptides codon positions e.g. [3, 4, 5] or [4, 7, 10]
 654          */
 655         int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
 656                 : MappingUtils.flattenRanges(dnaToProtein.locateInFrom(
 657                         peptidePosition, peptidePosition));
 658         lastPeptidePostion = peptidePosition;
 659         lastCodon = codon;
 660
 661         /*
 662          * save nucleotide (and this variant) for each codon position
 663          */
 664         for (int codonPos = 0; codonPos < 3; codonPos++)
 665         {
 666           String nucleotide = String.valueOf(dnaSeq
 667                   .getCharAt(codon[codonPos] - 1));
 668           if (codon[codonPos] == dnaCol)
 669           {
 670             /*
 671              * record current dna base and its alleles
 672              */
 673             String[] dnaVariants = new String[alleles.length + 1];
 674             dnaVariants[0] = nucleotide;
 675             System.arraycopy(alleles, 0, dnaVariants, 1, alleles.length);
 676             codonVariants[codonPos] = dnaVariants;
 677           }
 678           else if (codonVariants[codonPos] == null)
 679           {
 680             /*
 681              * record current dna base only
 682              * (at least until we find any variation and overwrite it)
 683              */
 684             codonVariants[codonPos] = new String[] { nucleotide };
 685           }
 686         }
 687       }
 688     }
 689
 690     /*
 691      * scan codon variations, compute peptide variants and add to peptide sequence
 692      */
 693     int count = 0;
 694     for (Entry<Integer, String[][]> variant : variants.entrySet())
 695     {
 696       int peptidePos = variant.getKey();
 697       String[][] codonVariants = variant.getValue();
 698       String residue = String.valueOf(peptide.getCharAt(peptidePos - 1)); // 0-based
 699       List<String> peptideVariants = computePeptideVariants(codonVariants,
 700               residue);
 701       if (!peptideVariants.isEmpty())
 702       {
 703         Collections.sort(peptideVariants);
 704         String desc = StringUtils.listToDelimitedString(peptideVariants,
 705                 ", ");
 706         SequenceFeature sf = new SequenceFeature(
 707                 SequenceOntology.SEQUENCE_VARIANT, desc, peptidePos,
 708                 peptidePos, Float.NaN, null);
 709         peptide.getDatasetSequence().addSequenceFeature(sf);
 710         count++;
 711       }
 712     }
 713     return count;
 714   }
 715
 716   /**
 717    * Returns a non-redundant list of all peptide translations generated by the
 718    * given dna variants, excluding the current residue value
 719    *
 720    * @param codonVariants
 721    *          an array of base values for codon positions 1, 2, 3
 722    * @param residue
 723    *          the current residue translation
 724    * @return
 725    */
 726   protected static List<String> computePeptideVariants(
 727           String[][] codonVariants, String residue)
 728   {
 729     List<String> result = new ArrayList<String>();
 730     for (String base1 : codonVariants[0])
 731     {
 732       for (String base2 : codonVariants[1])
 733       {
 734         for (String base3 : codonVariants[2])
 735         {
 736           String codon = base1 + base2 + base3;
 737           // TODO: report frameshift/insertion/deletion
 738           // and multiple-base variants?!
 739           String peptide = codon.contains("-") ? "-" : ResidueProperties
 740                   .codonTranslate(codon);
 741           if (peptide != null && !result.contains(peptide)
 742                   && !peptide.equals(residue))
 743           {
 744             result.add(peptide);
 745           }
 746         }
 747       }
 748     }
 749     return result;
 750   }
 751
 752   /**
 753    * Computes a list of all peptide variants given dna variants
 754    *
 755    * @param dnaSeq
 756    *          the coding dna sequence
 757    * @param codonVariants
 758    *          variant features for each codon position (null if no variant)
 759    * @param residue
 760    *          the canonical protein translation
 761    * @return
 762    */
 763   protected static List<String> computePeptideVariants(SequenceI dnaSeq,
 764           SequenceFeature[] codonVariants, String residue)
 765   {
 766     List<String> result = new ArrayList<String>();
 767     int[][] dnaVariants = new int[3][];
 768     for (int i = 0; i < 3; i++)
 769     {
 770
 771     }
 772     // TODO Auto-generated method stub
 773     return null;
 774   }
 775
 776   /**
 777    * precalculate different products that can be found for seqs in dataset and
 778    * return them.
 779    *
 780    * @param dna
 781    * @param seqs
 782    * @param dataset
 783    * @param fake
 784    *          - don't actually build lists - just get types
 785    * @return public static Object[] buildXProductsList(boolean dna, SequenceI[]
 786    *         seqs, AlignmentI dataset, boolean fake) { String types[] =
 787    *         jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs,
 788    *         dataset); if (types != null) { System.out.println("Xref Types for:
 789    *         "+(dna ? "dna" : "prot")); for (int t = 0; t < types.length; t++) {
 790    *         System.out.println("Type: " + types[t]); SequenceI[] prod =
 791    *         jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]);
 792    *         System.out.println("Found " + ((prod == null) ? "no" : "" +
 793    *         prod.length) + " products"); if (prod!=null) { for (int p=0;
 794    *         p<prod.length; p++) { System.out.println("Prod "+p+":
 795    *         "+prod[p].getDisplayId(true)); } } } } else {
 796    *         System.out.println("Trying getProducts for
 797    *         "+al.getSequenceAt(0).getDisplayId(true));
 798    *         System.out.println("Search DS Xref for: "+(dna ? "dna" : "prot"));
 799    *         // have a bash at finding the products amongst all the retrieved
 800    *         sequences. SequenceI[] prod =
 801    *         jalview.analysis.CrossRef.findXrefSequences(al
 802    *         .getSequencesArray(), dna, null, ds); System.out.println("Found " +
 803    *         ((prod == null) ? "no" : "" + prod.length) + " products"); if
 804    *         (prod!=null) { // select non-equivalent sequences from dataset list
 805    *         for (int p=0; p<prod.length; p++) { System.out.println("Prod "+p+":
 806    *         "+prod[p].getDisplayId(true)); } } } }
 807    */
 808 }