JAL-3990 gradle spotlessApply

[jalview.git] / src / jalview / datamodel / SequenceI.java
diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java

index f1cba43..2f365e6 100755 (executable)
--- a/src/jalview/datamodel/SequenceI.java
+++ b/src/jalview/datamodel/SequenceI.java
@@ -20,8 +20,13 @@
   */
  package jalview.datamodel;
  
-import jalview.api.DBRefEntryI;
+import jalview.datamodel.Sequence.DBModList;
+import jalview.datamodel.features.SequenceFeaturesI;
+import jalview.util.MapList;
+import jalview.ws.params.InvalidArgumentException;
  
+import java.util.BitSet;
+import java.util.Iterator;
  import java.util.List;
  import java.util.Vector;
  
@@ -109,18 +114,20 @@ public interface SequenceI extends ASequenceI
     * get a range on the sequence as a string
     * 
     * @param start
-   *          position relative to start of sequence including gaps (from 0)
+   *          (inclusive) position relative to start of sequence including gaps
+   *          (from 0)
     * @param end
-   *          position relative to start of sequence including gaps (from 0)
+   *          (exclusive) position relative to start of sequence including gaps
+   *          (from 0)
     * 
     * @return String containing all gap and symbols in specified range
     */
    public String getSequenceAsString(int start, int end);
  
    /**
-   * Get the sequence as a character array
+   * Answers a copy of the sequence as a character array
     * 
-   * @return seqeunce and any gaps
+   * @return
     */
    public char[] getSequence();
  
@@ -176,7 +183,7 @@ public interface SequenceI extends ASequenceI
    public String getDescription();
  
    /**
-   * Return the alignment column for a sequence position
+   * Return the alignment column (from 1..) for a sequence position
     * 
     * @param pos
     *          lying from start to end
@@ -191,16 +198,31 @@ public interface SequenceI extends ASequenceI
    public int findIndex(int pos);
  
    /**
-   * Returns the sequence position for an alignment position
+   * Returns the sequence position for an alignment (column) position. If at a
+   * gap, returns the position of the next residue to the right. If beyond the
+   * end of the sequence, returns 1 more than the last residue position.
     * 
     * @param i
     *          column index in alignment (from 0..<length)
     * 
-   * @return residue number for residue (left of and) nearest ith column
+   * @return
     */
    public int findPosition(int i);
  
    /**
+   * Returns the sequence positions for first and last residues lying within the
+   * given column positions [fromColum,toColumn] (where columns are numbered
+   * from 1), or null if no residues are included in the range
+   * 
+   * @param fromColum
+   *          - first column base 1
+   * @param toColumn
+   *          - last column, base 1
+   * @return
+   */
+  public ContiguousI findPositions(int fromColum, int toColumn);
+
+  /**
     * Returns an int array where indices correspond to each residue in the
     * sequence and the element value gives its position in the alignment
     * 
@@ -210,6 +232,13 @@ public interface SequenceI extends ASequenceI
    public int[] gapMap();
  
    /**
+   * Build a bitset corresponding to sequence gaps
+   * 
+   * @return a BitSet where set values correspond to gaps in the sequence
+   */
+  public BitSet gapBitset();
+
+  /**
     * Returns an int array where indices correspond to each position in sequence
     * char array and the element value gives the result of findPosition for that
     * index in the sequence.
@@ -219,6 +248,15 @@ public interface SequenceI extends ASequenceI
    public int[] findPositionMap();
  
    /**
+   * Answers true if the sequence is composed of amino acid characters. Note
+   * that implementations may use heuristic methods which are not guaranteed to
+   * give the biologically 'right' answer.
+   * 
+   * @return
+   */
+  public boolean isProtein();
+
+  /**
     * Delete a range of aligned sequence columns, creating a new dataset sequence
     * if necessary and adjusting start and end positions accordingly.
     * 
@@ -233,36 +271,47 @@ public interface SequenceI extends ASequenceI
     * DOCUMENT ME!
     * 
     * @param i
-   *          DOCUMENT ME!
+   *          alignment column number
     * @param c
-   *          DOCUMENT ME!
+   *          character to insert
     */
    public void insertCharAt(int i, char c);
  
    /**
-   * DOCUMENT ME!
+   * insert given character at alignment column position
     * 
     * @param position
-   *          DOCUMENT ME!
+   *          alignment column number
+   * @param count
+   *          length of insert
     * @param ch
-   *          DOCUMENT ME!
+   *          character to insert
     */
    public void insertCharAt(int position, int count, char ch);
  
    /**
-   * DOCUMENT ME!
+   * Answers a list of all sequence features associated with this sequence. The
+   * list may be held by the sequence's dataset sequence if that is defined.
     * 
-   * @return DOCUMENT ME!
+   * @return
     */
-  public SequenceFeature[] getSequenceFeatures();
+  public List<SequenceFeature> getSequenceFeatures();
  
    /**
-   * DOCUMENT ME!
+   * Answers the object holding features for the sequence
     * 
-   * @param v
-   *          DOCUMENT ME!
+   * @return
     */
-  public void setSequenceFeatures(SequenceFeature[] features);
+  SequenceFeaturesI getFeatures();
+
+  /**
+   * Replaces the sequence features associated with this sequence with the given
+   * features. If this sequence has a dataset sequence, then this method will
+   * update the dataset sequence's features instead.
+   * 
+   * @param features
+   */
+  public void setSequenceFeatures(List<SequenceFeature> features);
  
    /**
     * DOCUMENT ME!
@@ -280,11 +329,18 @@ public interface SequenceI extends ASequenceI
    public Vector<PDBEntry> getAllPDBEntries();
  
    /**
-   * add entry to the vector of PDBIds, if it isn't in the list already
+   * Adds the entry to the *normalised* list of PDBIds.
+   * 
+   * If a PDBEntry is passed with the same entry.getID() string as one already
+   * in the list, or one is added that appears to be the same but has a chain ID
+   * appended, then the existing PDBEntry will be updated with the new
+   * attributes instead, unless the entries have distinct chain codes or
+   * associated structure files.
     * 
     * @param entry
+   * @return true if the entry was added, false if updated
     */
-  public void addPDBId(PDBEntry entry);
+  public boolean addPDBId(PDBEntry entry);
  
    /**
     * update the list of PDBEntrys to include any DBRefEntrys citing structural
@@ -298,9 +354,21 @@ public interface SequenceI extends ASequenceI
  
    public void setVamsasId(String id);
  
-  public void setDBRefs(DBRefEntry[] dbs);
+  /**
+   * set the array of Database references for the sequence.
+   * 
+   * BH 2019.02.04 changes param to DBModlist
+   * 
+   * @param dbs
+   * @deprecated - use is discouraged since side-effects may occur if DBRefEntry
+   *             set are not normalised.
+   * @throws InvalidArgumentException
+   *           if the is not one created by Sequence itself
+   */
+  @Deprecated
+  public void setDBRefs(DBModList<DBRefEntry> dbs);
  
-  public DBRefEntry[] getDBRefs();
+  public DBModList<DBRefEntry> getDBRefs();
  
    /**
     * add the given entry to the list of DBRefs for this sequence, or replace a
@@ -310,7 +378,14 @@ public interface SequenceI extends ASequenceI
     */
    public void addDBRef(DBRefEntry entry);
  
-  public void addSequenceFeature(SequenceFeature sf);
+  /**
+   * Adds the given sequence feature and returns true, or returns false if it is
+   * already present on the sequence, or if the feature type is null.
+   * 
+   * @param sf
+   * @return
+   */
+  public boolean addSequenceFeature(SequenceFeature sf);
  
    public void deleteFeature(SequenceFeature sf);
  
@@ -372,6 +447,18 @@ public interface SequenceI extends ASequenceI
            String label);
  
    /**
+   * Returns a (possibly empty) list of any annotations that match on given
+   * calcId (source), label (type) and description (observation instance). Null
+   * values do not match.
+   * 
+   * @param calcId
+   * @param label
+   * @param description
+   */
+  public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId,
+          String label, String description);
+
+  /**
     * create a new dataset sequence (if necessary) for this sequence and sets
     * this sequence to refer to it. This call will move any features or
     * references on the sequence onto the dataset. It will also make a duplicate
@@ -395,17 +482,6 @@ public interface SequenceI extends ASequenceI
    public void transferAnnotation(SequenceI entry, Mapping mp);
  
    /**
-   * @param index
-   *          The sequence index in the MSA
-   */
-  public void setIndex(int index);
-
-  /**
-   * @return The index of the sequence in the alignment
-   */
-  public int getIndex();
-
-  /**
     * @return The RNA of the sequence in the alignment
     */
  
@@ -432,7 +508,93 @@ public interface SequenceI extends ASequenceI
     */
    public PDBEntry getPDBEntry(String pdbId);
  
-  public void setSourceDBRef(DBRefEntryI dbRef);
+  /**
+   * Get all primary database/accessions for this sequence's data. These
+   * DBRefEntry are expected to resolve to a valid record in the associated
+   * external database, either directly or via a provided 1:1 Mapping.
+   * 
+   * @return just the primary references (if any) for this sequence, or an empty
+   *         list
+   */
+  public List<DBRefEntry> getPrimaryDBRefs();
+
+  /**
+   * Returns a (possibly empty) list of sequence features that overlap the given
+   * alignment column range, optionally restricted to one or more specified
+   * feature types. If the range is all gaps, then features which enclose it are
+   * included (but not contact features).
+   * 
+   * @param fromCol
+   *          start column of range inclusive (1..)
+   * @param toCol
+   *          end column of range inclusive (1..)
+   * @param types
+   *          optional feature types to restrict results to
+   * @return
+   */
+  List<SequenceFeature> findFeatures(int fromCol, int toCol,
+          String... types);
+
+  /**
+   * Method to call to indicate that the sequence (characters or alignment/gaps)
+   * has been modified. Provided to allow any cursors on residue/column
+   * positions to be invalidated.
+   */
+  void sequenceChanged();
+
+  /**
+   * 
+   * @return BitSet corresponding to index [0,length) where Comparison.isGap()
+   *         returns true.
+   */
+  BitSet getInsertionsAsBits();
+
+  /**
+   * Replaces every occurrence of c1 in the sequence with c2 and returns the
+   * number of characters changed
+   * 
+   * @param c1
+   * @param c2
+   */
+  public int replace(char c1, char c2);
+
+  /**
+   * Answers the GeneLociI, or null if not known
+   * 
+   * @return
+   */
+  GeneLociI getGeneLoci();
+
+  /**
+   * Sets the mapping to gene loci for the sequence
+   * 
+   * @param speciesId
+   * @param assemblyId
+   * @param chromosomeId
+   * @param map
+   */
+  void setGeneLoci(String speciesId, String assemblyId, String chromosomeId,
+          MapList map);
+
+  /**
+   * Returns the sequence string constructed from the substrings of a sequence
+   * defined by the int[] ranges provided by an iterator. E.g. the iterator
+   * could iterate over all visible regions of the alignment
+   * 
+   * @param it
+   *          the iterator to use
+   * @return a String corresponding to the sequence
+   */
+  public String getSequenceStringFromIterator(Iterator<int[]> it);
+
+  /**
+   * Locate the first position in this sequence which is not contained in an
+   * iterator region. If no such position exists, return 0
+   * 
+   * @param it
+   *          iterator over regions
+   * @return first residue not contained in regions
+   */
+  public int firstResidueOutsideIterator(Iterator<int[]> it);
  
-  public DBRefEntryI getSourceDBRef();
  }