X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FSequenceI.java;h=82575ec89c7767223ab47024e7f765c3705a2c24;hb=81682dafca2e79cf141b134496db0c3a84027806;hp=38be37f1f72af433fc3605e8f80ce7c0d12d1996;hpb=88694463a2aea303694231603b61970f72a5a259;p=jalview.git diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 38be37f..82575ec 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -20,9 +20,13 @@ */ package jalview.datamodel; +import jalview.datamodel.Sequence.DBModList; import jalview.datamodel.features.SequenceFeaturesI; +import jalview.util.MapList; +import jalview.ws.params.InvalidArgumentException; import java.util.BitSet; +import java.util.Iterator; import java.util.List; import java.util.Vector; @@ -44,6 +48,9 @@ public interface SequenceI extends ASequenceI */ public void setName(String name); + public HiddenMarkovModel getHMM(); + + public void setHMM(HiddenMarkovModel hmm); /** * Get the display name */ @@ -94,6 +101,14 @@ public interface SequenceI extends ASequenceI public int getLength(); /** + * Replace the sequence with the given characters + * + * @param sequence + * new sequence characters + */ + public void setSequence(char[] sequence); + + /** * Replace the sequence with the given string * * @param sequence @@ -110,18 +125,20 @@ public interface SequenceI extends ASequenceI * get a range on the sequence as a string * * @param start - * position relative to start of sequence including gaps (from 0) + * (inclusive) position relative to start of sequence including gaps + * (from 0) * @param end - * position relative to start of sequence including gaps (from 0) + * (exclusive) position relative to start of sequence including gaps + * (from 0) * * @return String containing all gap and symbols in specified range */ public String getSequenceAsString(int start, int end); /** - * Get the sequence as a character array + * Answers a copy of the sequence as a character array * - * @return seqeunce and any gaps + * @return */ public char[] getSequence(); @@ -192,39 +209,29 @@ public interface SequenceI extends ASequenceI public int findIndex(int pos); /** - * Returns the sequence position for an alignment position. + * Returns the sequence position for an alignment (column) position. If at a + * gap, returns the position of the next residue to the right. If beyond the + * end of the sequence, returns 1 more than the last residue position. * * @param i * column index in alignment (from 0.. - * Example: - * >Seq/8-13 - * ABC--DE-F - * findPositions(1, 4) returns Range(9, 9) // B only - * findPositions(3, 4) returns null // all gaps - * findPositions(2, 6) returns Range(10, 12) // CDE - * findPositions(3, 7) returns Range(11,12) // DE - * + * Returns the sequence positions for first and last residues lying within the + * given column positions [fromColum,toColumn] (where columns are numbered + * from 1), or null if no residues are included in the range * - * @param fromCol - * first aligned column position (base 0, inclusive) - * @param toCol - * last aligned column position (base 0, inclusive) - * - * @return + * @param fromColum + * - first column base 1. (0 and negative positions are rounded up) + * @param toColumn + * - last column, base 1 + * @return null if fromColum>toColumn */ - public Range findPositions(int fromCol, int toCol); + public ContiguousI findPositions(int fromColum, int toColumn); /** * Returns an int array where indices correspond to each residue in the @@ -236,6 +243,13 @@ public interface SequenceI extends ASequenceI public int[] gapMap(); /** + * Build a bitset corresponding to sequence gaps + * + * @return a BitSet where set values correspond to gaps in the sequence + */ + public BitSet gapBitset(); + + /** * Returns an int array where indices correspond to each position in sequence * char array and the element value gives the result of findPosition for that * index in the sequence. @@ -290,7 +304,7 @@ public interface SequenceI extends ASequenceI * Answers a list of all sequence features associated with this sequence. The * list may be held by the sequence's dataset sequence if that is defined. * - * @return hard reference to array + * @return */ public List getSequenceFeatures(); @@ -354,14 +368,17 @@ public interface SequenceI extends ASequenceI /** * set the array of Database references for the sequence. * + * BH 2019.02.04 changes param to DBModlist + * * @param dbs * @deprecated - use is discouraged since side-effects may occur if DBRefEntry * set are not normalised. + * @throws InvalidArgumentException if the is not one created by Sequence itself */ @Deprecated - public void setDBRefs(DBRefEntry[] dbs); + public void setDBRefs(DBModList dbs); - public DBRefEntry[] getDBRefs(); + public DBModList getDBRefs(); /** * add the given entry to the list of DBRefs for this sequence, or replace a @@ -387,6 +404,24 @@ public interface SequenceI extends ASequenceI public SequenceI getDatasetSequence(); /** + * Returns the top grandparent in the dataset sequences hierarchy + * or null if there is no dataset associated with this sequence. + */ + public default SequenceI getRootDatasetSequence() + { + if (getDatasetSequence() == null) + { + return null; + } + var sequence = getDatasetSequence(); + while (sequence.getDatasetSequence() != null) + { + sequence = sequence.getDatasetSequence(); + } + return sequence; + } + + /** * Returns a new array containing this sequence's annotations, or null. */ public AlignmentAnnotation[] getAnnotation(); @@ -440,6 +475,17 @@ public interface SequenceI extends ASequenceI String label); /** + * Returns a (possibly empty) list of any annotations that match on given + * calcId (source), label (type) and description (observation instance). + * Null values do not match. + * + * @param calcId + * @param label + * @param description + */ + public List getAlignmentAnnotations(String calcId, + String label, String description); + /** * create a new dataset sequence (if necessary) for this sequence and sets * this sequence to refer to it. This call will move any features or * references on the sequence onto the dataset. It will also make a duplicate @@ -463,17 +509,6 @@ public interface SequenceI extends ASequenceI public void transferAnnotation(SequenceI entry, Mapping mp); /** - * @param index - * The sequence index in the MSA - */ - public void setIndex(int index); - - /** - * @return The index of the sequence in the alignment - */ - public int getIndex(); - - /** * @return The RNA of the sequence in the alignment */ @@ -509,18 +544,27 @@ public interface SequenceI extends ASequenceI * list */ public List getPrimaryDBRefs(); + /** + * Answers true if the sequence has annotation for Hidden Markov Model + * information content, else false + */ + boolean hasHMMAnnotation(); /** - * Returns a (possibly empty) list of sequence features that overlap the range - * from-to (inclusive), optionally restricted to one or more specified feature - * types + * Returns a (possibly empty) list of sequence features that overlap the given + * alignment column range, optionally restricted to one or more specified + * feature types. If the range is all gaps, then features which enclose it are + * included (but not contact features). * - * @param from - * @param to + * @param fromCol + * start column of range inclusive (1..) + * @param toCol + * end column of range inclusive (1..) * @param types + * optional feature types to restrict results to * @return */ - List findFeatures(int from, int to, String... types); + List findFeatures(int fromCol, int toCol, String... types); /** * Method to call to indicate that the sequence (characters or alignment/gaps) @@ -535,4 +579,62 @@ public interface SequenceI extends ASequenceI * returns true. */ BitSet getInsertionsAsBits(); + + /** + * Replaces every occurrence of c1 in the sequence with c2 and returns the + * number of characters changed + * + * @param c1 + * @param c2 + */ + public int replace(char c1, char c2); + + /** + * Answers the GeneLociI, or null if not known + * + * @return + */ + GeneLociI getGeneLoci(); + + /** + * Sets the mapping to gene loci for the sequence + * + * @param speciesId + * @param assemblyId + * @param chromosomeId + * @param map + */ + void setGeneLoci(String speciesId, String assemblyId, + String chromosomeId, MapList map); + + + /** + * Returns the sequence string constructed from the substrings of a sequence + * defined by the int[] ranges provided by an iterator. E.g. the iterator + * could iterate over all visible regions of the alignment + * + * @param it + * the iterator to use + * @return a String corresponding to the sequence + */ + public String getSequenceStringFromIterator(Iterator it); + + /** + * Locate the first position in this sequence which is not contained in an + * iterator region. If no such position exists, return 0 + * + * @param it + * iterator over regions + * @return first residue not contained in regions + */ + public int firstResidueOutsideIterator(Iterator it); + + + /** + * Answers true if this sequence has an associated Hidden Markov Model + * + * @return + */ + boolean hasHMMProfile(); } +