src/jalview/datamodel/AlignedCodonFrame.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
   3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import java.util.ArrayList;
  24 import java.util.List;
  25
  26 import jalview.util.MapList;
  27 import jalview.util.MappingUtils;
  28
  29 /**
  30  * Stores mapping between the columns of a protein alignment and a DNA alignment
  31  * and a list of individual codon to amino acid mappings between sequences.
  32  */
  33 public class AlignedCodonFrame
  34 {
  35
  36   /**
  37    * tied array of na Sequence objects.
  38    */
  39   private SequenceI[] dnaSeqs = null;
  40
  41   /**
  42    * tied array of Mappings to protein sequence Objects and SequenceI[]
  43    * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs
  44    * element to corresponding aaSeqs element
  45    */
  46   private Mapping[] dnaToProt = null;
  47
  48   /**
  49    * Constructor
  50    */
  51   public AlignedCodonFrame()
  52   {
  53   }
  54
  55   /**
  56    * Adds a mapping between the dataset sequences for the associated dna and
  57    * protein sequence objects
  58    *
  59    * @param dnaseq
  60    * @param aaseq
  61    * @param map
  62    */
  63   public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map)
  64   {
  65     int nlen = 1;
  66     if (dnaSeqs != null)
  67     {
  68       nlen = dnaSeqs.length + 1;
  69     }
  70     SequenceI[] ndna = new SequenceI[nlen];
  71     Mapping[] ndtp = new Mapping[nlen];
  72     if (dnaSeqs != null)
  73     {
  74       System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length);
  75       System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length);
  76     }
  77     dnaSeqs = ndna;
  78     dnaToProt = ndtp;
  79     nlen--;
  80     dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq
  81             .getDatasetSequence();
  82     Mapping mp = new Mapping(map);
  83     // JBPNote DEBUG! THIS !
  84     // dnaseq.transferAnnotation(aaseq, mp);
  85     // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
  86     mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
  87             .getDatasetSequence();
  88     dnaToProt[nlen] = mp;
  89   }
  90
  91   public SequenceI[] getdnaSeqs()
  92   {
  93     return dnaSeqs;
  94   }
  95
  96   public SequenceI[] getAaSeqs()
  97   {
  98     if (dnaToProt == null)
  99     {
 100       return null;
 101     }
 102     SequenceI[] sqs = new SequenceI[dnaToProt.length];
 103     for (int sz = 0; sz < dnaToProt.length; sz++)
 104     {
 105       sqs[sz] = dnaToProt[sz].to;
 106     }
 107     return sqs;
 108   }
 109
 110   public MapList[] getdnaToProt()
 111   {
 112     if (dnaToProt == null)
 113     {
 114       return null;
 115     }
 116     MapList[] sqs = new MapList[dnaToProt.length];
 117     for (int sz = 0; sz < dnaToProt.length; sz++)
 118     {
 119       sqs[sz] = dnaToProt[sz].map;
 120     }
 121     return sqs;
 122   }
 123
 124   public Mapping[] getProtMappings()
 125   {
 126     return dnaToProt;
 127   }
 128
 129   /**
 130    * Returns the first mapping found which is to or from the given sequence, or
 131    * null.
 132    *
 133    * @param seq
 134    * @return
 135    */
 136   public Mapping getMappingForSequence(SequenceI seq)
 137   {
 138     if (dnaSeqs == null)
 139     {
 140       return null;
 141     }
 142     SequenceI seqDs = seq.getDatasetSequence();
 143     seqDs = seqDs != null ? seqDs : seq;
 144
 145     for (int ds = 0; ds < dnaSeqs.length; ds++)
 146     {
 147       if (dnaSeqs[ds] == seqDs || dnaToProt[ds].to == seqDs)
 148       {
 149         return dnaToProt[ds];
 150       }
 151     }
 152     return null;
 153   }
 154
 155   /**
 156    * Return the corresponding aligned or dataset aa sequence for given dna
 157    * sequence, null if not found.
 158    *
 159    * @param sequenceRef
 160    * @return
 161    */
 162   public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
 163   {
 164     if (dnaSeqs == null)
 165     {
 166       return null;
 167     }
 168     SequenceI dnads = dnaSeqRef.getDatasetSequence();
 169     for (int ds = 0; ds < dnaSeqs.length; ds++)
 170     {
 171       if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
 172       {
 173         return dnaToProt[ds].to;
 174       }
 175     }
 176     return null;
 177   }
 178
 179   /**
 180    *
 181    * @param sequenceRef
 182    * @return null or corresponding aaSeq entry for dnaSeq entry
 183    */
 184   public SequenceI getDnaForAaSeq(SequenceI aaSeqRef)
 185   {
 186     if (dnaToProt == null)
 187     {
 188       return null;
 189     }
 190     SequenceI aads = aaSeqRef.getDatasetSequence();
 191     for (int as = 0; as < dnaToProt.length; as++)
 192     {
 193       if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads)
 194       {
 195         return dnaSeqs[as];
 196       }
 197     }
 198     return null;
 199   }
 200
 201   /**
 202    * test to see if codon frame involves seq in any way
 203    *
 204    * @param seq
 205    *          a nucleotide or protein sequence
 206    * @return true if a mapping exists to or from this sequence to any translated
 207    *         sequence
 208    */
 209   public boolean involvesSequence(SequenceI seq)
 210   {
 211     return getAaForDnaSeq(seq) != null || getDnaForAaSeq(seq) != null;
 212   }
 213
 214   /**
 215    * Add search results for regions in other sequences that translate or are
 216    * translated from a particular position in seq
 217    *
 218    * @param seq
 219    * @param index
 220    *          position in seq
 221    * @param results
 222    *          where highlighted regions go
 223    */
 224   public void markMappedRegion(SequenceI seq, int index,
 225           SearchResults results)
 226   {
 227     if (dnaToProt == null)
 228     {
 229       return;
 230     }
 231     int[] codon;
 232     SequenceI ds = seq.getDatasetSequence();
 233     for (int mi = 0; mi < dnaToProt.length; mi++)
 234     {
 235       if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds)
 236       {
 237         // DEBUG System.err.println("dna pos "+index);
 238         codon = dnaToProt[mi].map.locateInTo(index, index);
 239         if (codon != null)
 240         {
 241           for (int i = 0; i < codon.length; i += 2)
 242           {
 243             results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]);
 244           }
 245         }
 246       }
 247       else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds)
 248       {
 249         // DEBUG System.err.println("aa pos "+index);
 250         {
 251           codon = dnaToProt[mi].map.locateInFrom(index, index);
 252           if (codon != null)
 253           {
 254             for (int i = 0; i < codon.length; i += 2)
 255             {
 256               results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]);
 257             }
 258           }
 259         }
 260       }
 261     }
 262   }
 263
 264   /**
 265    * Returns the DNA codon positions (base 1) for the given position (base 1) in
 266    * a mapped protein sequence, or null if no mapping is found.
 267    *
 268    * Intended for use in aligning cDNA to match aligned protein. Only the first
 269    * mapping found is returned, so not suitable for use if multiple protein
 270    * sequences are mapped to the same cDNA (but aligning cDNA as protein is
 271    * ill-defined for this case anyway).
 272    *
 273    * @param seq
 274    *          the DNA dataset sequence
 275    * @param aaPos
 276    *          residue position (base 1) in a protein sequence
 277    * @return
 278    */
 279   public int[] getDnaPosition(SequenceI seq, int aaPos)
 280   {
 281     /*
 282      * Adapted from markMappedRegion().
 283      */
 284     MapList ml = null;
 285     for (int i = 0; i < dnaToProt.length; i++)
 286     {
 287       if (dnaSeqs[i] == seq)
 288       {
 289         ml = getdnaToProt()[i];
 290         break;
 291       }
 292     }
 293     return ml == null ? null : ml.locateInFrom(aaPos, aaPos);
 294   }
 295
 296   /**
 297    * Convenience method to return the first aligned sequence in the given
 298    * alignment whose dataset has a mapping with the given dataset sequence.
 299    *
 300    * @param seq
 301    *
 302    * @param al
 303    * @return
 304    */
 305   public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al)
 306   {
 307     /*
 308      * Search mapped protein ('to') sequences first.
 309      */
 310     if (this.dnaToProt != null)
 311     {
 312       for (int i = 0; i < dnaToProt.length; i++)
 313       {
 314         if (this.dnaSeqs[i] == seq)
 315         {
 316           for (SequenceI sourceAligned : al.getSequences())
 317           {
 318             if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence())
 319             {
 320               return sourceAligned;
 321             }
 322           }
 323         }
 324       }
 325     }
 326
 327     /*
 328      * Then try mapped dna sequences.
 329      */
 330     if (this.dnaToProt != null)
 331     {
 332       for (int i = 0; i < dnaToProt.length; i++)
 333       {
 334         if (this.dnaToProt[i].to == seq)
 335         {
 336           for (SequenceI sourceAligned : al.getSequences())
 337           {
 338             if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence())
 339             {
 340               return sourceAligned;
 341             }
 342           }
 343         }
 344       }
 345     }
 346
 347     return null;
 348   }
 349
 350   /**
 351    * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to
 352    * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is
 353    * a set of start/end position pairs.
 354    *
 355    * @param mappedFrom
 356    * @param mappedTo
 357    * @param pos
 358    * @return
 359    */
 360   public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo,
 361           int pos)
 362   {
 363     SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom
 364             : mappedFrom.getDatasetSequence();
 365     SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo
 366             : mappedTo.getDatasetSequence();
 367     if (targetDs == null || sourceDs == null || dnaToProt == null)
 368     {
 369       return null;
 370     }
 371     for (int mi = 0; mi < dnaToProt.length; mi++)
 372     {
 373       if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs)
 374       {
 375         int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos);
 376         if (codon != null) {
 377           return codon;
 378         }
 379       }
 380     }
 381     return null;
 382   }
 383
 384   /**
 385    * Returns the DNA codon for the given position (base 1) in a mapped protein
 386    * sequence, or null if no mapping is found.
 387    *
 388    * @param protein
 389    *          the peptide dataset sequence
 390    * @param aaPos
 391    *          residue position (base 1) in the peptide sequence
 392    * @return
 393    */
 394   public char[] getMappedCodon(SequenceI protein, int aaPos)
 395   {
 396     if (dnaToProt == null)
 397     {
 398       return null;
 399     }
 400     MapList ml = null;
 401     char[] dnaSeq = null;
 402     for (int i = 0; i < dnaToProt.length; i++)
 403     {
 404       if (dnaToProt[i].to == protein)
 405       {
 406         ml = getdnaToProt()[i];
 407         dnaSeq = dnaSeqs[i].getSequence();
 408         break;
 409       }
 410     }
 411     if (ml == null)
 412     {
 413       return null;
 414     }
 415     int[] codonPos = ml.locateInFrom(aaPos, aaPos);
 416     if (codonPos == null)
 417     {
 418       return null;
 419     }
 420
 421     /*
 422      * Read off the mapped nucleotides (converting to position base 0)
 423      */
 424     codonPos = MappingUtils.flattenRanges(codonPos);
 425     return new char[]
 426     { dnaSeq[codonPos[0] - 1], dnaSeq[codonPos[1] - 1],
 427         dnaSeq[codonPos[2] - 1] };
 428   }
 429
 430   /**
 431    * Returns any mappings found which are to (or from) the given sequence, and
 432    * to distinct sequences.
 433    *
 434    * @param seq
 435    * @return
 436    */
 437   public List<Mapping> getMappingsForSequence(SequenceI seq)
 438   {
 439     List<Mapping> result = new ArrayList<Mapping>();
 440     if (dnaSeqs == null)
 441     {
 442       return result;
 443     }
 444     List<SequenceI> related = new ArrayList<SequenceI>();
 445     SequenceI seqDs = seq.getDatasetSequence();
 446     seqDs = seqDs != null ? seqDs : seq;
 447
 448     for (int ds = 0; ds < dnaSeqs.length; ds++)
 449     {
 450       final Mapping mapping = dnaToProt[ds];
 451       if (dnaSeqs[ds] == seqDs || mapping.to == seqDs)
 452       {
 453         if (!related.contains(mapping.to))
 454         {
 455           result.add(mapping);
 456           related.add(mapping.to);
 457         }
 458       }
 459     }
 460     return result;
 461   }
 462 }