2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureLocationI;
25 import java.util.HashMap;
27 import java.util.Map.Entry;
28 import java.util.Vector;
36 public class SequenceFeature implements FeatureLocationI
39 * score value if none is set; preferably Float.Nan, but see
40 * JAL-2060 and JAL-2554 for a couple of blockers to that
42 private static final float NO_SCORE = 0f;
44 private static final String STATUS = "status";
46 private static final String STRAND = "STRAND";
48 // private key for Phase designed not to conflict with real GFF data
49 private static final String PHASE = "!Phase";
51 // private key for ENA location designed not to conflict with real GFF data
52 private static final String LOCATION = "!Location";
55 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
56 * name1=value1;name2=value2,value3;...etc
58 private static final String ATTRIBUTES = "ATTRIBUTES";
61 * type, begin, end, featureGroup are final to ensure that
62 * the integrity of SequenceFeatures data store can't be
63 * broken by direct update of these fields
65 public final String type;
67 public final int begin;
71 public final String featureGroup;
75 public String description;
78 * a map of key-value pairs; may be populated from GFF 'column 9' data,
79 * other data sources (e.g. GenBank file), or programmatically
81 public Map<String, Object> otherDetails;
83 public Vector<String> links;
86 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
87 * otherDetails map, so the new and original SequenceFeature may reference the
88 * same objects in the map.
92 public SequenceFeature(SequenceFeature cpy)
94 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup());
106 public SequenceFeature(String theType, String theDesc, int theBegin,
107 int theEnd, String group)
110 this.description = theDesc;
111 this.begin = theBegin;
113 this.featureGroup = group;
114 this.score = NO_SCORE;
118 * Constructor including a score value
127 public SequenceFeature(String theType, String theDesc, int theBegin,
128 int theEnd, float theScore, String group)
130 this(theType, theDesc, theBegin, theEnd, group);
131 this.score = theScore;
135 * A copy constructor that allows the begin and end positions and group to be
143 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
146 this(sf.getType(), newBegin, newEnd, newGroup);
148 description = sf.description;
149 if (sf.otherDetails != null)
151 otherDetails = new HashMap<String, Object>();
152 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
154 otherDetails.put(entry.getKey(), entry.getValue());
157 if (sf.links != null && sf.links.size() > 0)
159 links = new Vector<String>();
160 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
162 links.addElement(sf.links.elementAt(i));
168 * Constructor that sets the final fields type, begin, end, group
175 private SequenceFeature(String theType, int theBegin, int theEnd,
181 featureGroup = theGroup;
185 * Two features are considered equal if they have the same type, group,
186 * description, start, end, phase, strand, and (if present) 'Name', ID' and
187 * 'Parent' attributes.
189 * Note we need to check Parent to distinguish the same exon occurring in
190 * different transcripts (in Ensembl GFF). This allows assembly of transcript
191 * sequences from their component exon regions.
194 public boolean equals(Object o)
196 return equals(o, false);
200 * Overloaded method allows the equality test to optionally ignore the
201 * 'Parent' attribute of a feature. This supports avoiding adding many
202 * superficially duplicate 'exon' or CDS features to genomic or protein
206 * @param ignoreParent
209 public boolean equals(Object o, boolean ignoreParent)
211 if (o == null || !(o instanceof SequenceFeature))
216 SequenceFeature sf = (SequenceFeature) o;
217 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
219 if (begin != sf.begin || end != sf.end || !sameScore)
224 if (getStrand() != sf.getStrand())
229 if (!(type + description + featureGroup + getPhase()).equals(sf.type
230 + sf.description + sf.featureGroup + sf.getPhase()))
234 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
238 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
244 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
253 * Returns true if both values are null, are both non-null and equal
259 protected static boolean equalAttribute(Object att1, Object att2)
261 if (att1 == null && att2 == null)
267 return att1.equals(att2);
269 return att2.equals(att1);
275 * @return DOCUMENT ME!
278 public int getBegin()
286 * @return DOCUMENT ME!
297 * @return DOCUMENT ME!
299 public String getType()
307 * @return DOCUMENT ME!
309 public String getDescription()
314 public void setDescription(String desc)
319 public String getFeatureGroup()
324 public void addLink(String labelLink)
328 links = new Vector<String>();
331 if (!links.contains(labelLink))
333 links.insertElementAt(labelLink, 0);
337 public float getScore()
342 public void setScore(float value)
348 * Used for getting values which are not in the basic set. eg STRAND, PHASE
354 public Object getValue(String key)
356 if (otherDetails == null)
362 return otherDetails.get(key);
367 * Returns a property value for the given key if known, else the specified
371 * @param defaultValue
374 public Object getValue(String key, Object defaultValue)
376 Object value = getValue(key);
377 return value == null ? defaultValue : value;
381 * Used for setting values which are not in the basic set. eg STRAND, FRAME
389 public void setValue(String key, Object value)
393 if (otherDetails == null)
395 otherDetails = new HashMap<String, Object>();
398 otherDetails.put(key, value);
403 * The following methods are added to maintain the castor Uniprot mapping file
406 public void setStatus(String status)
408 setValue(STATUS, status);
411 public String getStatus()
413 return (String) getValue(STATUS);
416 public void setAttributes(String attr)
418 setValue(ATTRIBUTES, attr);
421 public String getAttributes()
423 return (String) getValue(ATTRIBUTES);
427 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
428 * GFF), and 0 for unknown or not (validly) specified
432 public int getStrand()
435 if (otherDetails != null)
437 Object str = otherDetails.get(STRAND);
442 else if ("+".equals(str))
451 * Set the value of strand
454 * should be "+" for forward, or "-" for reverse
456 public void setStrand(String strand)
458 setValue(STRAND, strand);
461 public void setPhase(String phase)
463 setValue(PHASE, phase);
466 public String getPhase()
468 return (String) getValue(PHASE);
472 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
476 public void setEnaLocation(String loc)
478 setValue(LOCATION, loc);
482 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
486 public String getEnaLocation()
488 return (String) getValue(LOCATION);
492 * Readable representation, for debug only, not guaranteed not to change
496 public String toString()
498 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
503 * Overridden to ensure that whenever two objects are equal, they have the
507 public int hashCode()
509 String s = getType() + getDescription() + getFeatureGroup()
510 + getValue("ID") + getValue("Name") + getValue("Parent")
512 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
517 * Answers true if the feature's start/end values represent two related
518 * positions, rather than ends of a range. Such features may be visualised or
519 * reported differently to features on a range.
522 public boolean isContactFeature()
524 // TODO abstract one day to a FeatureType class
525 if ("disulfide bond".equalsIgnoreCase(type)
526 || "disulphide bond".equalsIgnoreCase(type))
534 * Answers true if the sequence has zero start and end position
538 public boolean isNonPositional()
540 return begin == 0 && end == 0;