X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FSequence.java;h=b0969b5cdf686aa08a809435e17a65707d2a1bd9;hb=8c8433dbbca18eb7955537a8756fba92d044dd41;hp=8928425f607b54b4cff148bc8d93d3b19ec9e921;hpb=58d43be1e7c0bcd32c567ec9dfafc3abee45e20c;p=jalview.git
diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java
index 8928425..b0969b5 100755
--- a/src/jalview/datamodel/Sequence.java
+++ b/src/jalview/datamodel/Sequence.java
@@ -1,200 +1,1175 @@
-package jalview.datamodel;
-
-import jalview.analysis.*;
-import java.awt.*;
-import java.util.*;
-import MCview.*;
-
-
-public class Sequence implements SequenceI
-{
- protected String name;
- protected String sequence;
- protected String description;
- protected int start;
- protected int end;
- protected String displayId;
- protected Color color = Color.white;
- String pdbId;
-
- public Vector sequenceFeatures = new Vector();
- public void setSequenceFeatures(Vector v)
- {
- sequenceFeatures = v;
- }
-
- public Vector getSequenceFeatures()
- {return sequenceFeatures; }
-
- public void setPDBId(String id)
- {
- pdbId = id;
- }
- public String getPDBId()
- {
- return pdbId;
- }
-
-
- public Sequence(String name, String sequence, int start, int end)
- {
-
- this.name = name;
- this.sequence = sequence;
- this.start = start;
- this.end = end;
-
- setDisplayId();
-
- }
-
- public Sequence(String name,String sequence) {
- this(name,sequence,1,sequence.length());
- }
- public Sequence(SequenceI seq) {
- this(seq.getName(),seq.getSequence(),seq.getStart(),seq.getEnd());
- }
- public String getDisplayId() {
- return displayId;
- }
- public void setDisplayId() {
- displayId = name + "/" + start + "-" + end;
- }
- public void setName(String name) {
- this.name = name;
- setDisplayId();
- }
- public String getName() {
- return this.name;
- }
- public void setStart(int start) {
- this.start = start;
- setDisplayId();
- }
- public int getStart() {
- return this.start;
- }
- public void setEnd(int end) {
- this.end = end;
- setDisplayId();
- }
- public int getEnd() {
- return this.end;
- }
- public int getLength() {
- return this.sequence.length();
- }
- public void setSequence(String seq) {
- this.sequence = seq;
- }
- public String getSequence() {
- return this.sequence;
- }
- public String getSequence(int start,int end) {
- if(end>sequence.length())
- end = sequence.length();
- return this.sequence.substring(start,end);
- }
-
- public char getCharAt(int i) {
- if (i < sequence.length()) {
- return sequence.charAt(i);
- } else {
- return ' ';
- }
- }
- public void setDescription(String desc) {
- this.description = desc;
- }
- public String getDescription() {
- return this.description;
- }
-
- public int findIndex(int pos) {
- // returns the alignment position for a residue
- int j = start;
- int i = 0;
-
- while (i< sequence.length() && j <= end && j <= pos) {
-
- char c = sequence.charAt(i);
-
- if (!jalview.util.Comparison.isGap((c)))
- j++;
-
- i++;
- }
- if (j == end && j < pos)
- return end+1;
- else
- return i;
-
- }
-
- public int findPosition(int i) {
- // Returns the sequence position for an alignment position
- int j = 0;
- int pos = start;
-
- while (j < i && j.
+ */
+package jalview.datamodel;
+
+import java.util.*;
+
+import jalview.analysis.*;
+
+/**
+ *
+ * Implements the SequenceI interface for a char[] based sequence object.
+ *
+ * @author $author$
+ * @version $Revision$
+ */
+public class Sequence implements SequenceI
+{
+ SequenceI datasetSequence;
+
+ String name;
+
+ private char[] sequence;
+
+ String description;
+
+ int start;
+
+ int end;
+
+ Vector pdbIds;
+
+ String vamsasId;
+
+ DBRefEntry[] dbrefs;
+
+ /**
+ * This annotation is displayed below the alignment but the positions are tied
+ * to the residues of this sequence
+ */
+ Vector annotation;
+
+ /** array of seuqence features - may not be null for a valid sequence object */
+ public SequenceFeature[] sequenceFeatures;
+
+ /**
+ * Creates a new Sequence object.
+ *
+ * @param name
+ * display name string
+ * @param sequence
+ * string to form a possibly gapped sequence out of
+ * @param start
+ * first position of non-gap residue in the sequence
+ * @param end
+ * last position of ungapped residues (nearly always only used for
+ * display purposes)
+ */
+ public Sequence(String name, String sequence, int start, int end)
+ {
+ this.name = name;
+ this.sequence = sequence.toCharArray();
+ this.start = start;
+ this.end = end;
+ parseId();
+ checkValidRange();
+ }
+
+ public Sequence(String name, char[] sequence, int start, int end)
+ {
+ this.name = name;
+ this.sequence = sequence;
+ this.start = start;
+ this.end = end;
+ parseId();
+ checkValidRange();
+ }
+
+ com.stevesoft.pat.Regex limitrx = new com.stevesoft.pat.Regex(
+ "[/][0-9]{1,}[-][0-9]{1,}$");
+
+ com.stevesoft.pat.Regex endrx = new com.stevesoft.pat.Regex("[0-9]{1,}$");
+
+ void parseId()
+ {
+ if (name == null)
+ {
+ System.err
+ .println("POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
+ name = "";
+ }
+ // Does sequence have the /start-end signiature?
+ if (limitrx.search(name))
+ {
+ name = limitrx.left();
+ endrx.search(limitrx.stringMatched());
+ setStart(Integer.parseInt(limitrx.stringMatched().substring(1,
+ endrx.matchedFrom() - 1)));
+ setEnd(Integer.parseInt(endrx.stringMatched()));
+ }
+ }
+
+ void checkValidRange()
+ {
+ // Note: JAL-774 : http://issues.jalview.org/browse/JAL-774?focusedCommentId=11239&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-11239
+ {
+ int endRes = 0;
+ for (int j = 0; j < sequence.length; j++)
+ {
+ if (!jalview.util.Comparison.isGap(sequence[j]))
+ {
+ endRes++;
+ }
+ }
+ if (endRes > 0)
+ {
+ endRes += start - 1;
+ }
+
+ if (end= sequence.length)
+ {
+ return new char[0];
+ }
+
+ if (end >= sequence.length)
+ {
+ end = sequence.length;
+ }
+
+ char[] reply = new char[end - start];
+ System.arraycopy(sequence, start, reply, 0, end - start);
+
+ return reply;
+ }
+
+ /**
+ * make a new Sequence object from start to end (including gaps) over this
+ * seqeunce
+ *
+ * @param start
+ * int
+ * @param end
+ * int
+ * @return SequenceI
+ */
+ public SequenceI getSubSequence(int start, int end)
+ {
+ if (start < 0)
+ {
+ start = 0;
+ }
+ char[] seq = getSequence(start, end);
+ if (seq.length == 0)
+ {
+ return null;
+ }
+ int nstart = findPosition(start);
+ int nend = findPosition(end) - 1;
+ // JBPNote - this is an incomplete copy.
+ SequenceI nseq = new Sequence(this.getName(), seq, nstart, nend);
+ nseq.setDescription(description);
+ if (datasetSequence != null)
+ {
+ nseq.setDatasetSequence(datasetSequence);
+ }
+ else
+ {
+ nseq.setDatasetSequence(this);
+ }
+ return nseq;
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param i
+ * DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ public char getCharAt(int i)
+ {
+ if (i < sequence.length)
+ {
+ return sequence[i];
+ }
+ else
+ {
+ return ' ';
+ }
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param desc
+ * DOCUMENT ME!
+ */
+ public void setDescription(String desc)
+ {
+ this.description = desc;
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ public String getDescription()
+ {
+ return this.description;
+ }
+
+ /**
+ * Return the alignment position for a sequence position
+ *
+ * @param pos
+ * lying from start to end
+ *
+ * @return aligned position of residue pos
+ */
+ public int findIndex(int pos)
+ {
+ // returns the alignment position for a residue
+ int j = start;
+ int i = 0;
+ // Rely on end being at least as long as the length of the sequence.
+ while ((i < sequence.length) && (j <= end) && (j <= pos))
+ {
+ if (!jalview.util.Comparison.isGap(sequence[i]))
+ {
+ j++;
+ }
+
+ i++;
+ }
+
+ if ((j == end) && (j < pos))
+ {
+ return end + 1;
+ }
+ else
+ {
+ return i;
+ }
+ }
+
+ /**
+ * Returns the sequence position for an alignment position
+ *
+ * @param i
+ * column index in alignment (from 1)
+ *
+ * @return residue number for residue (left of and) nearest ith column
+ */
+ public int findPosition(int i)
+ {
+ int j = 0;
+ int pos = start;
+ int seqlen = sequence.length;
+ while ((j < i) && (j < seqlen))
+ {
+ if (!jalview.util.Comparison.isGap(sequence[j]))
+ {
+ pos++;
+ }
+
+ j++;
+ }
+
+ return pos;
+ }
+
+ /**
+ * Returns an int array where indices correspond to each residue in the
+ * sequence and the element value gives its position in the alignment
+ *
+ * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no
+ * residues in SequenceI object
+ */
+ public int[] gapMap()
+ {
+ String seq = jalview.analysis.AlignSeq.extractGaps(
+ jalview.util.Comparison.GapChars, new String(sequence));
+ int[] map = new int[seq.length()];
+ int j = 0;
+ int p = 0;
+
+ while (j < sequence.length)
+ {
+ if (!jalview.util.Comparison.isGap(sequence[j]))
+ {
+ map[p++] = j;
+ }
+
+ j++;
+ }
+
+ return map;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see jalview.datamodel.SequenceI#findPositionMap()
+ */
+ public int[] findPositionMap()
+ {
+ int map[] = new int[sequence.length];
+ int j = 0;
+ int pos = start;
+ int seqlen = sequence.length;
+ while ((j < seqlen))
+ {
+ map[j] = pos;
+ if (!jalview.util.Comparison.isGap(sequence[j]))
+ {
+ pos++;
+ }
+
+ j++;
+ }
+ return map;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see jalview.datamodel.SequenceI#deleteChars(int, int)
+ */
+ public void deleteChars(int i, int j)
+ {
+ int newstart = start, newend = end;
+ if (i >= sequence.length)
+ {
+ return;
+ }
+
+ char[] tmp;
+
+ if (j >= sequence.length)
+ {
+ tmp = new char[i];
+ System.arraycopy(sequence, 0, tmp, 0, i);
+ }
+ else
+ {
+ tmp = new char[sequence.length - j + i];
+ System.arraycopy(sequence, 0, tmp, 0, i);
+ System.arraycopy(sequence, j, tmp, i, sequence.length - j);
+ }
+ boolean createNewDs = false;
+ for (int s = i; s < j; s++)
+ {
+ if (jalview.schemes.ResidueProperties.aaIndex[sequence[s]] != 23)
+ {
+ if (createNewDs)
+ {
+ newend--;
+ }
+ else
+ {
+ int sindex = findIndex(start) - 1;
+ if (sindex == s)
+ {
+ // delete characters including start of sequence
+ newstart = findPosition(j);
+ break; // don't need to search for any more residue characters.
+ }
+ else
+ {
+ // delete characters after start.
+ int eindex = findIndex(end) - 1;
+ if (eindex < j)
+ {
+ // delete characters at end of sequence
+ newend = findPosition(i - 1);
+ break; // don't need to search for any more residue characters.
+ }
+ else
+ {
+ createNewDs = true;
+ newend--; // decrease end position by one for the deleted residue
+ // and search further
+ }
+ }
+ }
+ }
+ }
+ // deletion occured in the middle of the sequence
+ if (createNewDs && this.datasetSequence != null)
+ {
+ // construct a new sequence
+ Sequence ds = new Sequence(datasetSequence);
+ // TODO: remove any non-inheritable properties ?
+ // TODO: create a sequence mapping (since there is a relation here ?)
+ ds.deleteChars(i, j);
+ datasetSequence = ds;
+ }
+ start = newstart;
+ end = newend;
+ sequence = tmp;
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param i
+ * DOCUMENT ME!
+ * @param c
+ * DOCUMENT ME!
+ * @param chop
+ * DOCUMENT ME!
+ */
+ public void insertCharAt(int i, int length, char c)
+ {
+ char[] tmp = new char[sequence.length + length];
+
+ if (i >= sequence.length)
+ {
+ System.arraycopy(sequence, 0, tmp, 0, sequence.length);
+ i = sequence.length;
+ }
+ else
+ {
+ System.arraycopy(sequence, 0, tmp, 0, i);
+ }
+
+ int index = i;
+ while (length > 0)
+ {
+ tmp[index++] = c;
+ length--;
+ }
+
+ if (i < sequence.length)
+ {
+ System.arraycopy(sequence, i, tmp, index, sequence.length - i);
+ }
+
+ sequence = tmp;
+ }
+
+ public void insertCharAt(int i, char c)
+ {
+ insertCharAt(i, 1, c);
+ }
+
+ public String getVamsasId()
+ {
+ return vamsasId;
+ }
+
+ public void setVamsasId(String id)
+ {
+ vamsasId = id;
+ }
+
+ public void setDBRef(DBRefEntry[] dbref)
+ {
+ dbrefs = dbref;
+ }
+
+ public DBRefEntry[] getDBRef()
+ {
+ if (dbrefs == null && datasetSequence != null
+ && this != datasetSequence)
+ {
+ return datasetSequence.getDBRef();
+ }
+ return dbrefs;
+ }
+
+ public void addDBRef(DBRefEntry entry)
+ {
+ if (dbrefs == null)
+ {
+ dbrefs = new DBRefEntry[0];
+ }
+
+ int i, iSize = dbrefs.length;
+
+ for (i = 0; i < iSize; i++)
+ {
+ if (dbrefs[i].equalRef(entry))
+ {
+ if (entry.getMap() != null)
+ {
+ if (dbrefs[i].getMap() == null)
+ {
+ // overwrite with 'superior' entry that contains a mapping.
+ dbrefs[i] = entry;
+ }
+ }
+ return;
+ }
+ }
+
+ DBRefEntry[] temp = new DBRefEntry[iSize + 1];
+ System.arraycopy(dbrefs, 0, temp, 0, iSize);
+ temp[temp.length - 1] = entry;
+
+ dbrefs = temp;
+ }
+
+ public void setDatasetSequence(SequenceI seq)
+ {
+ datasetSequence = seq;
+ }
+
+ public SequenceI getDatasetSequence()
+ {
+ return datasetSequence;
+ }
+
+ public AlignmentAnnotation[] getAnnotation()
+ {
+ if (annotation == null)
+ {
+ return null;
+ }
+
+ AlignmentAnnotation[] ret = new AlignmentAnnotation[annotation.size()];
+ for (int r = 0; r < ret.length; r++)
+ {
+ ret[r] = (AlignmentAnnotation) annotation.elementAt(r);
+ }
+
+ return ret;
+ }
+
+ public void addAlignmentAnnotation(AlignmentAnnotation annotation)
+ {
+ if (this.annotation == null)
+ {
+ this.annotation = new Vector();
+ }
+
+ this.annotation.addElement(annotation);
+ annotation.setSequenceRef(this);
+ }
+
+ public void removeAlignmentAnnotation(AlignmentAnnotation annotation)
+ {
+ if (this.annotation != null)
+ {
+ this.annotation.removeElement(annotation);
+ if (this.annotation.size() == 0)
+ this.annotation = null;
+ }
+ }
+
+ /**
+ * test if this is a valid candidate for another sequence's dataset sequence.
+ *
+ */
+ private boolean isValidDatasetSequence()
+ {
+ if (datasetSequence != null)
+ {
+ return false;
+ }
+ for (int i = 0; i < sequence.length; i++)
+ {
+ if (jalview.util.Comparison.isGap(sequence[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see jalview.datamodel.SequenceI#deriveSequence()
+ */
+ public SequenceI deriveSequence()
+ {
+ SequenceI seq = new Sequence(this);
+ if (datasetSequence != null)
+ {
+ // duplicate current sequence with same dataset
+ seq.setDatasetSequence(datasetSequence);
+ }
+ else
+ {
+ if (isValidDatasetSequence())
+ {
+ // Use this as dataset sequence
+ seq.setDatasetSequence(this);
+ }
+ else
+ {
+ // Create a new, valid dataset sequence
+ SequenceI ds = seq;
+ ds.setSequence(AlignSeq.extractGaps(
+ jalview.util.Comparison.GapChars, new String(sequence)));
+ setDatasetSequence(ds);
+ ds.setSequenceFeatures(getSequenceFeatures());
+ seq = this; // and return this sequence as the derived sequence.
+ }
+ }
+ return seq;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see jalview.datamodel.SequenceI#createDatasetSequence()
+ */
+ public SequenceI createDatasetSequence()
+ {
+ if (datasetSequence == null)
+ {
+ datasetSequence = new Sequence(getName(), AlignSeq.extractGaps(
+ jalview.util.Comparison.GapChars, getSequenceAsString()),
+ getStart(), getEnd());
+ datasetSequence.setSequenceFeatures(getSequenceFeatures());
+ datasetSequence.setDescription(getDescription());
+ setSequenceFeatures(null);
+ // move database references onto dataset sequence
+ datasetSequence.setDBRef(getDBRef());
+ setDBRef(null);
+ datasetSequence.setPDBId(getPDBId());
+ setPDBId(null);
+ datasetSequence.updatePDBIds();
+ }
+ return datasetSequence;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * jalview.datamodel.SequenceI#setAlignmentAnnotation(AlignmmentAnnotation[]
+ * annotations)
+ */
+ public void setAlignmentAnnotation(AlignmentAnnotation[] annotations)
+ {
+ if (annotation != null)
+ {
+ annotation.removeAllElements();
+ }
+ if (annotations != null)
+ {
+ for (int i = 0; i < annotations.length; i++)
+ {
+ if (annotations[i] != null)
+ addAlignmentAnnotation(annotations[i]);
+ }
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see jalview.datamodel.SequenceI#getAnnotation(java.lang.String)
+ */
+ public AlignmentAnnotation[] getAnnotation(String label)
+ {
+ if (annotation == null || annotation.size() == 0)
+ {
+ return null;
+ }
+
+ Vector subset = new Vector();
+ Enumeration e = annotation.elements();
+ while (e.hasMoreElements())
+ {
+ AlignmentAnnotation ann = (AlignmentAnnotation) e.nextElement();
+ if (ann.label != null && ann.label.equals(label))
+ {
+ subset.addElement(ann);
+ }
+ }
+ if (subset.size() == 0)
+ {
+ return null;
+ }
+ AlignmentAnnotation[] anns = new AlignmentAnnotation[subset.size()];
+ int i = 0;
+ e = subset.elements();
+ while (e.hasMoreElements())
+ {
+ anns[i++] = (AlignmentAnnotation) e.nextElement();
+ }
+ subset.removeAllElements();
+ return anns;
+ }
+
+ public boolean updatePDBIds()
+ {
+ if (datasetSequence != null)
+ {
+ // TODO: could merge DBRefs
+ return datasetSequence.updatePDBIds();
+ }
+ if (dbrefs == null || dbrefs.length == 0)
+ {
+ return false;
+ }
+ Vector newpdb = new Vector();
+ for (int i = 0; i < dbrefs.length; i++)
+ {
+ if (DBRefSource.PDB.equals(dbrefs[i].getSource()))
+ {
+ PDBEntry pdbe = new PDBEntry();
+ pdbe.setId(dbrefs[i].getAccessionId());
+ if (pdbIds == null || pdbIds.size() == 0)
+ {
+ newpdb.addElement(pdbe);
+ }
+ else
+ {
+ Enumeration en = pdbIds.elements();
+ boolean matched = false;
+ while (!matched && en.hasMoreElements())
+ {
+ PDBEntry anentry = (PDBEntry) en.nextElement();
+ if (anentry.getId().equals(pdbe.getId()))
+ {
+ matched = true;
+ }
+ }
+ if (!matched)
+ {
+ newpdb.addElement(pdbe);
+ }
+ }
+ }
+ }
+ if (newpdb.size() > 0)
+ {
+ Enumeration en = newpdb.elements();
+ while (en.hasMoreElements())
+ {
+ addPDBId((PDBEntry) en.nextElement());
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * jalview.datamodel.SequenceI#transferAnnotation(jalview.datamodel.SequenceI,
+ * jalview.datamodel.Mapping)
+ */
+ public void transferAnnotation(SequenceI entry, Mapping mp)
+ {
+ if (datasetSequence != null)
+ {
+ datasetSequence.transferAnnotation(entry, mp);
+ return;
+ }
+ if (entry.getDatasetSequence() != null)
+ {
+ transferAnnotation(entry.getDatasetSequence(), mp);
+ return;
+ }
+ // transfer any new features from entry onto sequence
+ if (entry.getSequenceFeatures() != null)
+ {
+
+ SequenceFeature[] sfs = entry.getSequenceFeatures();
+ for (int si = 0; si < sfs.length; si++)
+ {
+ SequenceFeature sf[] = (mp != null) ? mp.locateFeature(sfs[si])
+ : new SequenceFeature[]
+ { new SequenceFeature(sfs[si]) };
+ if (sf != null && sf.length > 0)
+ {
+ for (int sfi = 0; sfi < sf.length; sfi++)
+ {
+ addSequenceFeature(sf[sfi]);
+ }
+ }
+ }
+ }
+
+ // transfer PDB entries
+ if (entry.getPDBId() != null)
+ {
+ Enumeration e = entry.getPDBId().elements();
+ while (e.hasMoreElements())
+ {
+ PDBEntry pdb = (PDBEntry) e.nextElement();
+ addPDBId(pdb);
+ }
+ }
+ // transfer database references
+ DBRefEntry[] entryRefs = entry.getDBRef();
+ if (entryRefs != null)
+ {
+ for (int r = 0; r < entryRefs.length; r++)
+ {
+ DBRefEntry newref = new DBRefEntry(entryRefs[r]);
+ if (newref.getMap() != null && mp != null)
+ {
+ // remap ref using our local mapping
+ }
+ // we also assume all version string setting is done by dbSourceProxy
+ /*
+ * if (!newref.getSource().equalsIgnoreCase(dbSource)) {
+ * newref.setSource(dbSource); }
+ */
+ addDBRef(newref);
+ }
+ }
+ }
+
+}