X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignmentI.java;h=6d257a92834852358666e08b1e8955c01457fd04;hb=17e77c3f2949a0729322b4a8d907f3f34b6a9914;hp=416be69750e1e2b52652b27b4b9ff198cdb87dec;hpb=efc31b4a8d5cee63555586804a2b79c06bdb5a14;p=jalview.git diff --git a/src/jalview/datamodel/AlignmentI.java b/src/jalview/datamodel/AlignmentI.java index 416be69..6d257a9 100755 --- a/src/jalview/datamodel/AlignmentI.java +++ b/src/jalview/datamodel/AlignmentI.java @@ -1,292 +1,527 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ -package jalview.datamodel; - -import java.util.*; - - -/** Data structure to hold and manipulate a multiple sequence alignment - */ -public interface AlignmentI -{ - /** - * Calculates the number of sequences in an alignment - * - * @return Number of sequences in alignment - */ - public int getHeight(); - - /** - * Calculates the maximum width of the alignment, including gaps. - * - * @return Greatest sequence length within alignment. - */ - public int getWidth(); - - /** - * Calculates the longest sequence Id of the alignment - * - * @return Number of characters in longest sequence Id. - */ - public int getMaxIdLength(); - - /** - * Calculates if this set of sequences is all the same length - * - * @return true if all sequences in alignment are the same length - */ - public boolean isAligned(); - - /** - * Gets sequences as a Vector - * - * @return All sequences in alignment. - */ - public Vector getSequences(); - - /** - * Find a specific sequence in this alignment. - * - * @param i Index of required sequence. - * - * @return SequenceI at given index. - */ - public SequenceI getSequenceAt(int i); - - /** - * Add a new sequence to this alignment. - * - * @param seq New sequence will be added at end of alignment. - */ - public void addSequence(SequenceI seq); - - /** - * Used to set a particular index of the alignment with the given sequence. - * - * @param i Index of sequence to be updated. - * @param seq New sequence to be inserted. - */ - public void setSequenceAt(int i, SequenceI seq); - - /** - * Deletes a sequence from the alignment. - * - * @param s Sequence to be deleted. - */ - public void deleteSequence(SequenceI s); - - /** - * Deletes a sequence from the alignment. - * - * @param i Index of sequence to be deleted. - */ - public void deleteSequence(int i); - - /** - * Deletes all residues in every sequence of alignment within given columns. - * - * @param start Start index of columns to delete. - * @param end End index to columns to delete. - */ - public void deleteColumns(int start, int end); - - /** - * Deletes all residues in every sequence of alignment within given columns. - * - * @param seq1 Index of first sequence to delete columns from. - * @param seq2 Index of last sequence to delete columns from. - * @param start Start index of columns to delete. - * @param end End index of columns to delete. - */ - public void deleteColumns(int seq1, int seq2, int start, int end); - - /** - * Finds sequence in alignment using sequence name as query. - * - * @param name Id of sequence to search for. - * - * @return Sequence matching query, if found. If not found returns null. - */ - public SequenceI findName(String name); - - /** - * Finds sequence in alignment using full displayId as query. - * - * @param name displayId, ie NAME/25-100 - * - * @return Sequence matching query, if found. If not found returns null. - */ - public SequenceI findbyDisplayId(String name); - - /** - * Finds index of a given sequence in the alignment. - * - * @param s Sequence to look for. - * - * @return Index of sequence within the alignment. - */ - public int findIndex(SequenceI s); - - /** - * All sequences will be cut from beginning to given index. - * - * @param i Remove all residues in sequences up to this column. - */ - public void trimLeft(int i); - - /** - * All sequences will be cut from given index. - * - * @param i Remove all residues in sequences beyond this column. - */ - public void trimRight(int i); - - /** - * Removes all columns containing entirely gap characters. - */ - public void removeGaps(); - - /** - * Removes redundant sequences from alignment. - * - * @param threshold Remove all sequences above the given threshold. - * @param sel Set of sequences which will have redundant sequences removed from. - * - * @return All sequences below redundancy threshold. - */ - public Vector removeRedundancy(float threshold, Vector sel); - - /** - * Finds group that sequence at index i in alignment is part of. - * - * @param i Index in alignment. - * - * @return First group found for sequence at position i. WARNING : - * Sequences may be members of several groups. This method is incomplete. - */ - public SequenceGroup findGroup(int i); - - /** - * Finds group that given sequence is part of. - * - * @param s Sequence in alignment. - * - * @return First group found for sequence. WARNING : - * Sequences may be members of several groups. This method is incomplete. - */ - public SequenceGroup findGroup(SequenceI s); - - /** - * Finds all groups that a given sequence is part of. - * - * @param s Sequence in alignment. - * - * @return All groups containing given sequence. - */ - public SequenceGroup[] findAllGroups(SequenceI s); - - /** - * Adds a new SequenceGroup to this alignment. - * - * @param sg New group to be added. - */ - public void addGroup(SequenceGroup sg); - - /** - * Deletes a specific SequenceGroup - * - * @param g Group will be deleted from alignment. - */ - public void deleteGroup(SequenceGroup g); - - /** - * Get all the groups associated with this alignment. - * - * @return All groups as a Vector. - */ - public Vector getGroups(); - - /** - * Deletes all groups from this alignment. - */ - public void deleteAllGroups(); - - /** - * Adds a super group. A SuperGroup is a group of groups. - * - * @param sg Adds a new SuperGroup to alignment - */ - public void addSuperGroup(SuperGroup sg); - - /** - * Removes SuperGroup from alignment. - * - * @param sg This SuperGroup will be deleted from alignment. - */ - public void removeSuperGroup(SuperGroup sg); - - /** - * Finds any SuperGroup that a given SequenceGroup may be part of. - * - * @param sg SequenceGroup to search for. - * - * @return SuperGroup that contains the given SequenceGroup. - */ - public SuperGroup getSuperGroup(SequenceGroup sg); - - /** - * Adds a new AlignmentAnnotation to this alignment - */ - public void addAnnotation(AlignmentAnnotation aa); - - /** - * Deletes a specific AlignmentAnnotation from the alignment. - * - * @param aa DOCUMENT ME! - */ - public void deleteAnnotation(AlignmentAnnotation aa); - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public AlignmentAnnotation[] getAlignmentAnnotation(); - - /** - * DOCUMENT ME! - * - * @param gc DOCUMENT ME! - */ - public void setGapCharacter(char gc); - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public char getGapCharacter(); - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getAAFrequency(); -} +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9) + * Copyright (C) 2015 The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General License for more details. + * + * You should have received a copy of the GNU General License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.datamodel; + +import java.util.Hashtable; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Data structure to hold and manipulate a multiple sequence alignment + */ +public interface AlignmentI extends AnnotatedCollectionI +{ + /** + * Calculates the number of sequences in an alignment + * + * @return Number of sequences in alignment + */ + int getHeight(); + + /** + * + * Calculates the maximum width of the alignment, including gaps. + * + * @return Greatest sequence length within alignment. + */ + @Override + int getWidth(); + + /** + * Calculates if this set of sequences (visible and invisible) are all the + * same length + * + * @return true if all sequences in alignment are the same length + */ + boolean isAligned(); + + /** + * Calculates if this set of sequences is all the same length + * + * @param includeHidden + * optionally exclude hidden sequences from test + * @return true if all (or just visible) sequences are the same length + */ + boolean isAligned(boolean includeHidden); + + /** + * Gets sequences as a Synchronized collection + * + * @return All sequences in alignment. + */ + @Override + List getSequences(); + + /** + * Gets sequences as a SequenceI[] + * + * @return All sequences in alignment. + */ + SequenceI[] getSequencesArray(); + + /** + * Find a specific sequence in this alignment. + * + * @param i + * Index of required sequence. + * + * @return SequenceI at given index. + */ + SequenceI getSequenceAt(int i); + + /** + * Returns a map of lists of sequences keyed by sequence name. + * + * @return + */ + Map> getSequencesByName(); + + /** + * Add a new sequence to this alignment. + * + * @param seq + * New sequence will be added at end of alignment. + */ + void addSequence(SequenceI seq); + + /** + * Used to set a particular index of the alignment with the given sequence. + * + * @param i + * Index of sequence to be updated. + * @param seq + * New sequence to be inserted. + */ + void setSequenceAt(int i, SequenceI seq); + + /** + * Deletes a sequence from the alignment + * + * @param s + * Sequence to be deleted. + */ + void deleteSequence(SequenceI s); + + /** + * Deletes a sequence from the alignment. + * + * @param i + * Index of sequence to be deleted. + */ + void deleteSequence(int i); + + /** + * Finds sequence in alignment using sequence name as query. + * + * @param name + * Id of sequence to search for. + * + * @return Sequence matching query, if found. If not found returns null. + */ + SequenceI findName(String name); + + SequenceI[] findSequenceMatch(String name); + + /** + * Finds index of a given sequence in the alignment. + * + * @param s + * Sequence to look for. + * + * @return Index of sequence within the alignment or -1 if not found + */ + int findIndex(SequenceI s); + + /** + * Finds group that given sequence is part of. + * + * @param s + * Sequence in alignment. + * + * @return First group found for sequence. WARNING : Sequences may be members + * of several groups. This method is incomplete. + */ + SequenceGroup findGroup(SequenceI s); + + /** + * Finds all groups that a given sequence is part of. + * + * @param s + * Sequence in alignment. + * + * @return All groups containing given sequence. + */ + SequenceGroup[] findAllGroups(SequenceI s); + + /** + * Adds a new SequenceGroup to this alignment. + * + * @param sg + * New group to be added. + */ + void addGroup(SequenceGroup sg); + + /** + * Deletes a specific SequenceGroup + * + * @param g + * Group will be deleted from alignment. + */ + void deleteGroup(SequenceGroup g); + + /** + * Get all the groups associated with this alignment. + * + * @return All groups as a list. + */ + List getGroups(); + + /** + * Deletes all groups from this alignment. + */ + void deleteAllGroups(); + + /** + * Adds a new AlignmentAnnotation to this alignment + * + * @note Care should be taken to ensure that annotation is at least as wide as + * the longest sequence in the alignment for rendering purposes. + */ + void addAnnotation(AlignmentAnnotation aa); + + /** + * moves annotation to a specified index in alignment annotation display stack + * + * @param aa + * the annotation object to be moved + * @param index + * the destination position + */ + void setAnnotationIndex(AlignmentAnnotation aa, int index); + + /** + * Delete all annotations, including auto-calculated if the flag is set true. + * Returns true if at least one annotation was deleted, else false. + * + * @param includingAutoCalculated + * @return + */ + boolean deleteAllAnnotations(boolean includingAutoCalculated); + + /** + * Deletes a specific AlignmentAnnotation from the alignment, and removes its + * reference from any SequenceI or SequenceGroup object's annotation if and + * only if aa is contained within the alignment's annotation vector. + * Otherwise, it will do nothing. + * + * @param aa + * the annotation to delete + * @return true if annotation was deleted from this alignment. + */ + boolean deleteAnnotation(AlignmentAnnotation aa); + + /** + * Deletes a specific AlignmentAnnotation from the alignment, and optionally + * removes any reference from any SequenceI or SequenceGroup object's + * annotation if and only if aa is contained within the alignment's annotation + * vector. Otherwise, it will do nothing. + * + * @param aa + * the annotation to delete + * @param unhook + * flag indicating if any references should be removed from + * annotation - use this if you intend to add the annotation back + * into the alignment + * @return true if annotation was deleted from this alignment. + */ + boolean deleteAnnotation(AlignmentAnnotation aa, boolean unhook); + + /** + * Get the annotation associated with this alignment (this can be null if no + * annotation has ever been created on the alignment) + * + * @return array of AlignmentAnnotation objects + */ + @Override + AlignmentAnnotation[] getAlignmentAnnotation(); + + /** + * Change the gap character used in this alignment to 'gc' + * + * @param gc + * the new gap character. + */ + void setGapCharacter(char gc); + + /** + * Get the gap character used in this alignment + * + * @return gap character + */ + char getGapCharacter(); + + /** + * Test for all nucleotide alignment + * + * @return true if alignment is nucleotide sequence + */ + boolean isNucleotide(); + + /** + * Test if alignment contains RNA structure + * + * @return true if RNA structure AligmnentAnnotation was added to alignment + */ + boolean hasRNAStructure(); + + /** + * Set alignment to be a nucleotide sequence + * + */ + void setNucleotide(boolean b); + + /** + * Get the associated dataset for the alignment. + * + * @return Alignment containing dataset sequences or null of this is a + * dataset. + */ + Alignment getDataset(); + + /** + * Set the associated dataset for the alignment, or create one. + * + * @param dataset + * The dataset alignment or null to construct one. + */ + void setDataset(Alignment dataset); + + /** + * pads sequences with gaps (to ensure the set looks like an alignment) + * + * @return boolean true if alignment was modified + */ + boolean padGaps(); + + HiddenSequences getHiddenSequences(); + + /** + * Compact representation of alignment + * + * @return CigarArray + */ + CigarArray getCompactAlignment(); + + /** + * Set an arbitrary key value pair for an alignment. Note: both key and value + * objects should return a meaningful, human readable response to .toString() + * + * @param key + * @param value + */ + void setProperty(Object key, Object value); + + /** + * Get a named property from the alignment. + * + * @param key + * @return value of property + */ + Object getProperty(Object key); + + /** + * Get the property hashtable. + * + * @return hashtable of alignment properties (or null if none are defined) + */ + Hashtable getProperties(); + + /** + * add a reference to a frame of aligned codons for this alignment + * + * @param codons + */ + void addCodonFrame(AlignedCodonFrame codons); + + /** + * remove a particular codon frame reference from this alignment + * + * @param codons + * @return true if codon frame was removed. + */ + boolean removeCodonFrame(AlignedCodonFrame codons); + + /** + * get all codon frames associated with this alignment + * + * @return + */ + Set getCodonFrames(); + + /** + * Set the codon frame mappings (replacing any existing set). + */ + void setCodonFrames(Set acfs); + + /** + * get codon frames involving sequenceI + */ + List getCodonFrame(SequenceI seq); + + /** + * find sequence with given name in alignment + * + * @param token + * name to find + * @param b + * true implies that case insensitive matching will also be + * tried + * @return matched sequence or null + */ + SequenceI findName(String token, boolean b); + + /** + * find next sequence with given name in alignment starting after a given + * sequence + * + * @param startAfter + * the sequence after which the search will be started (usually the + * result of the last call to findName) + * @param token + * name to find + * @param b + * true implies that case insensitive matching will also be + * tried + * @return matched sequence or null + */ + SequenceI findName(SequenceI startAfter, String token, boolean b); + + /** + * find first sequence in alignment which is involved in the given search + * result object + * + * @param results + * @return -1 or index of sequence in alignment + */ + int findIndex(SearchResults results); + + /** + * append sequences and annotation from another alignment object to this one. + * Note: this is a straight transfer of object references, and may result in + * toappend's dependent data being transformed to fit the alignment (changing + * gap characters, etc...). If you are uncertain, use the copy Alignment copy + * constructor to create a new version which can be appended without side + * effect. + * + * @param toappend + * - the alignment to be appended. + */ + void append(AlignmentI toappend); + + /** + * Justify the sequences to the left or right by deleting and inserting gaps + * before the initial residue or after the terminal residue + * + * @param right + * true if alignment padded to right, false to justify to left + * @return true if alignment was changed TODO: return undo object + */ + boolean justify(boolean right); + + /** + * add given annotation row at given position (0 is start, -1 is end) + * + * @param consensus + * @param i + */ + void addAnnotation(AlignmentAnnotation consensus, int i); + + /** + * search for or create a specific annotation row on the alignment + * + * @param name + * name for annotation (must match) + * @param calcId + * calcId for the annotation (null or must match) + * @param autoCalc + * - value of autocalc flag for the annotation + * @param seqRef + * - null or specific sequence reference + * @param groupRef + * - null or specific group reference + * @param method + * - CalcId for the annotation (must match) + * + * @return existing annotation matching the given attributes + */ + AlignmentAnnotation findOrCreateAnnotation(String name, String calcId, + boolean autoCalc, SequenceI seqRef, SequenceGroup groupRef); + + /** + * move the given group up or down in the alignment by the given number of + * rows. Implementor assumes given group is already present on alignment - no + * recalculations are triggered. + * + * @param sg + * @param map + * @param up + * @param i + */ + void moveSelectedSequencesByOne(SequenceGroup sg, + Map map, boolean up); + + /** + * validate annotation after an edit and update any alignment state flags + * accordingly + * + * @param alignmentAnnotation + */ + void validateAnnotation(AlignmentAnnotation alignmentAnnotation); + + /** + * Align this alignment the same as the given one. If both of the same type + * (nucleotide/protein) then align both identically. If this is nucleotide and + * the other is protein, make 3 gaps for each gap in the protein sequences. If + * this is protein and the other is nucleotide, insert a gap for each 3 gaps + * (or part thereof) between nucleotide bases. Returns the number of mapped + * sequences that were realigned . + * + * @param al + * @return + */ + int alignAs(AlignmentI al); + + /** + * Returns the set of distinct sequence names in the alignment. + * + * @return + */ + Set getSequenceNames(); + + /** + * Checks if the alignment has at least one sequence with one non-gaped + * residue + * + * @return + */ + public boolean hasValidSequence(); +}