src/jalview/datamodel/AlignedCodonFrame.java

   1 /*
   2  * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
   3  * Copyright (C) 2014 The Jalview Authors
   4  *
   5  * This file is part of Jalview.
   6  *
   7  * Jalview is free software: you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License
   9  * as published by the Free Software Foundation, either version 3
  10  * of the License, or (at your option) any later version.
  11  *
  12  * Jalview is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty
  14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE.  See the GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
  19  * The Jalview Authors are detailed in the 'AUTHORS' file.
  20  */
  21 package jalview.datamodel;
  22
  23 import jalview.util.MapList;
  24 import jalview.util.MappingUtils;
  25
  26 /**
  27  * Stores mapping between the columns of a protein alignment and a DNA alignment
  28  * and a list of individual codon to amino acid mappings between sequences.
  29  */
  30 public class AlignedCodonFrame
  31 {
  32
  33   /*
  34    * tied array of na Sequence objects.
  35    */
  36   private SequenceI[] dnaSeqs = null;
  37
  38   /*
  39    * tied array of Mappings to protein sequence Objects and SequenceI[]
  40    * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs
  41    * element to corresponding aaSeqs element
  42    */
  43   private Mapping[] dnaToProt = null;
  44
  45   /**
  46    * Constructor
  47    */
  48   public AlignedCodonFrame()
  49   {
  50   }
  51
  52   /**
  53    * Adds a mapping between the dataset sequences for the associated dna and
  54    * protein sequence objects
  55    *
  56    * @param dnaseq
  57    * @param aaseq
  58    * @param map
  59    */
  60   public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map)
  61   {
  62     int nlen = 1;
  63     if (dnaSeqs != null)
  64     {
  65       nlen = dnaSeqs.length + 1;
  66     }
  67     SequenceI[] ndna = new SequenceI[nlen];
  68     Mapping[] ndtp = new Mapping[nlen];
  69     if (dnaSeqs != null)
  70     {
  71       System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length);
  72       System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length);
  73     }
  74     dnaSeqs = ndna;
  75     dnaToProt = ndtp;
  76     nlen--;
  77     dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq
  78             .getDatasetSequence();
  79     Mapping mp = new Mapping(map);
  80     // JBPNote DEBUG! THIS !
  81     // dnaseq.transferAnnotation(aaseq, mp);
  82     // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
  83     mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
  84             .getDatasetSequence();
  85     dnaToProt[nlen] = mp;
  86   }
  87
  88   public SequenceI[] getdnaSeqs()
  89   {
  90     return dnaSeqs;
  91   }
  92
  93   public SequenceI[] getAaSeqs()
  94   {
  95     if (dnaToProt == null)
  96     {
  97       return null;
  98     }
  99     SequenceI[] sqs = new SequenceI[dnaToProt.length];
 100     for (int sz = 0; sz < dnaToProt.length; sz++)
 101     {
 102       sqs[sz] = dnaToProt[sz].to;
 103     }
 104     return sqs;
 105   }
 106
 107   public MapList[] getdnaToProt()
 108   {
 109     if (dnaToProt == null)
 110     {
 111       return null;
 112     }
 113     MapList[] sqs = new MapList[dnaToProt.length];
 114     for (int sz = 0; sz < dnaToProt.length; sz++)
 115     {
 116       sqs[sz] = dnaToProt[sz].map;
 117     }
 118     return sqs;
 119   }
 120
 121   public Mapping[] getProtMappings()
 122   {
 123     return dnaToProt;
 124   }
 125
 126   /**
 127    * Returns the first mapping found which is to or from the given sequence, or
 128    * null.
 129    *
 130    * @param seq
 131    * @return
 132    */
 133   public Mapping getMappingForSequence(SequenceI seq)
 134   {
 135     if (dnaSeqs == null)
 136     {
 137       return null;
 138     }
 139     SequenceI seqDs = seq.getDatasetSequence();
 140     seqDs = seqDs != null ? seqDs : seq;
 141
 142     for (int ds = 0; ds < dnaSeqs.length; ds++)
 143     {
 144       if (dnaSeqs[ds] == seqDs || dnaToProt[ds].to == seqDs)
 145       {
 146         return dnaToProt[ds];
 147       }
 148     }
 149     return null;
 150   }
 151
 152   /**
 153    * Return the corresponding aligned or dataset aa sequence for given dna
 154    * sequence, null if not found.
 155    *
 156    * @param sequenceRef
 157    * @return
 158    */
 159   public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
 160   {
 161     if (dnaSeqs == null)
 162     {
 163       return null;
 164     }
 165     SequenceI dnads = dnaSeqRef.getDatasetSequence();
 166     for (int ds = 0; ds < dnaSeqs.length; ds++)
 167     {
 168       if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
 169       {
 170         return dnaToProt[ds].to;
 171       }
 172     }
 173     return null;
 174   }
 175
 176   /**
 177    *
 178    * @param sequenceRef
 179    * @return null or corresponding aaSeq entry for dnaSeq entry
 180    */
 181   public SequenceI getDnaForAaSeq(SequenceI aaSeqRef)
 182   {
 183     if (dnaToProt == null)
 184     {
 185       return null;
 186     }
 187     SequenceI aads = aaSeqRef.getDatasetSequence();
 188     for (int as = 0; as < dnaToProt.length; as++)
 189     {
 190       if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads)
 191       {
 192         return dnaSeqs[as];
 193       }
 194     }
 195     return null;
 196   }
 197
 198   /**
 199    * test to see if codon frame involves seq in any way
 200    *
 201    * @param seq
 202    *          a nucleotide or protein sequence
 203    * @return true if a mapping exists to or from this sequence to any translated
 204    *         sequence
 205    */
 206   public boolean involvesSequence(SequenceI seq)
 207   {
 208     return getAaForDnaSeq(seq) != null || getDnaForAaSeq(seq) != null;
 209   }
 210
 211   /**
 212    * Add search results for regions in other sequences that translate or are
 213    * translated from a particular position in seq
 214    *
 215    * @param seq
 216    * @param index
 217    *          position in seq
 218    * @param results
 219    *          where highlighted regions go
 220    */
 221   public void markMappedRegion(SequenceI seq, int index,
 222           SearchResults results)
 223   {
 224     if (dnaToProt == null)
 225     {
 226       return;
 227     }
 228     int[] codon;
 229     SequenceI ds = seq.getDatasetSequence();
 230     for (int mi = 0; mi < dnaToProt.length; mi++)
 231     {
 232       if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds)
 233       {
 234         // DEBUG System.err.println("dna pos "+index);
 235         codon = dnaToProt[mi].map.locateInTo(index, index);
 236         if (codon != null)
 237         {
 238           for (int i = 0; i < codon.length; i += 2)
 239           {
 240             results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]);
 241           }
 242         }
 243       }
 244       else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds)
 245       {
 246         // DEBUG System.err.println("aa pos "+index);
 247         {
 248           codon = dnaToProt[mi].map.locateInFrom(index, index);
 249           if (codon != null)
 250           {
 251             for (int i = 0; i < codon.length; i += 2)
 252             {
 253               results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]);
 254             }
 255           }
 256         }
 257       }
 258     }
 259   }
 260
 261   /**
 262    * Returns the DNA codon positions (base 1) for the given position (base 1) in
 263    * a mapped protein sequence, or null if no mapping is found.
 264    *
 265    * Intended for use in aligning cDNA to match aligned protein. Only the first
 266    * mapping found is returned, so not suitable for use if multiple protein
 267    * sequences are mapped to the same cDNA (but aligning cDNA as protein is
 268    * ill-defined for this case anyway).
 269    *
 270    * @param seq
 271    *          the DNA dataset sequence
 272    * @param aaPos
 273    *          residue position (base 1) in a protein sequence
 274    * @return
 275    */
 276   public int[] getDnaPosition(SequenceI seq, int aaPos)
 277   {
 278     /*
 279      * Adapted from markMappedRegion().
 280      */
 281     MapList ml = null;
 282     for (int i = 0; i < dnaToProt.length; i++)
 283     {
 284       if (dnaSeqs[i] == seq)
 285       {
 286         ml = getdnaToProt()[i];
 287         break;
 288       }
 289     }
 290     return ml == null ? null : ml.locateInFrom(aaPos, aaPos);
 291   }
 292
 293   /**
 294    * Convenience method to return the first aligned sequence in the given
 295    * alignment whose dataset has a mapping with the given dataset sequence.
 296    *
 297    * @param seq
 298    *
 299    * @param al
 300    * @return
 301    */
 302   public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al)
 303   {
 304     /*
 305      * Search mapped protein ('to') sequences first.
 306      */
 307     if (this.dnaToProt != null)
 308     {
 309       for (int i = 0; i < dnaToProt.length; i++)
 310       {
 311         if (this.dnaSeqs[i] == seq)
 312         {
 313           for (SequenceI sourceAligned : al.getSequences())
 314           {
 315             if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence())
 316             {
 317               return sourceAligned;
 318             }
 319           }
 320         }
 321       }
 322     }
 323
 324     /*
 325      * Then try mapped dna sequences.
 326      */
 327     if (this.dnaToProt != null)
 328     {
 329       for (int i = 0; i < dnaToProt.length; i++)
 330       {
 331         if (this.dnaToProt[i].to == seq)
 332         {
 333           for (SequenceI sourceAligned : al.getSequences())
 334           {
 335             if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence())
 336             {
 337               return sourceAligned;
 338             }
 339           }
 340         }
 341       }
 342     }
 343
 344     return null;
 345   }
 346
 347   /**
 348    * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to
 349    * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is
 350    * a set of start/end position pairs.
 351    *
 352    * @param mappedFrom
 353    * @param mappedTo
 354    * @param pos
 355    * @return
 356    */
 357   public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo,
 358           int pos)
 359   {
 360     SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom
 361             : mappedFrom.getDatasetSequence();
 362     SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo
 363             : mappedTo.getDatasetSequence();
 364     if (targetDs == null || sourceDs == null || dnaToProt == null)
 365     {
 366       return null;
 367     }
 368     for (int mi = 0; mi < dnaToProt.length; mi++)
 369     {
 370       if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs)
 371       {
 372         int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos);
 373         if (codon != null) {
 374           return codon;
 375         }
 376       }
 377     }
 378     return null;
 379   }
 380
 381   /**
 382    * Returns the DNA codon for the given position (base 1) in a mapped protein
 383    * sequence, or null if no mapping is found.
 384    *
 385    * @param protein
 386    *          the peptide dataset sequence
 387    * @param aaPos
 388    *          residue position (base 1) in the peptide sequence
 389    * @return
 390    */
 391   public char[] getMappedCodon(SequenceI protein, int aaPos)
 392   {
 393     if (dnaToProt == null)
 394     {
 395       return null;
 396     }
 397     MapList ml = null;
 398     char[] dnaSeq = null;
 399     for (int i = 0; i < dnaToProt.length; i++)
 400     {
 401       if (dnaToProt[i].to == protein)
 402       {
 403         ml = getdnaToProt()[i];
 404         dnaSeq = dnaSeqs[i].getSequence();
 405         break;
 406       }
 407     }
 408     if (ml == null)
 409     {
 410       return null;
 411     }
 412     int[] codonPos = ml.locateInFrom(aaPos, aaPos);
 413     if (codonPos == null)
 414     {
 415       return null;
 416     }
 417
 418     /*
 419      * Read off the mapped nucleotides (converting to position base 0)
 420      */
 421     codonPos = MappingUtils.flattenRanges(codonPos);
 422     return new char[]
 423     { dnaSeq[codonPos[0] - 1], dnaSeq[codonPos[1] - 1],
 424         dnaSeq[codonPos[2] - 1] };
 425   }
 426 }