2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureLocationI;
25 import java.util.HashMap;
27 import java.util.Map.Entry;
28 import java.util.Vector;
36 public class SequenceFeature implements FeatureLocationI
39 * score value if none is set; preferably Float.Nan, but see
40 * JAL-2060 and JAL-2554 for a couple of blockers to that
42 private static final float NO_SCORE = 0f;
44 private static final String STATUS = "status";
46 private static final String STRAND = "STRAND";
48 // private key for Phase designed not to conflict with real GFF data
49 private static final String PHASE = "!Phase";
51 // private key for ENA location designed not to conflict with real GFF data
52 private static final String LOCATION = "!Location";
55 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
56 * name1=value1;name2=value2,value3;...etc
58 private static final String ATTRIBUTES = "ATTRIBUTES";
61 * type, begin, end, featureGroup are final to ensure that
62 * the integrity of SequenceFeatures data store can't be
63 * broken by direct update of these fields
65 public final String type;
67 public final int begin;
71 public final String featureGroup;
73 public final float score;
75 public String description;
78 * a map of key-value pairs; may be populated from GFF 'column 9' data,
79 * other data sources (e.g. GenBank file), or programmatically
81 public Map<String, Object> otherDetails;
83 public Vector<String> links;
86 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
87 * otherDetails map, so the new and original SequenceFeature may reference the
88 * same objects in the map.
92 public SequenceFeature(SequenceFeature cpy)
94 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
107 public SequenceFeature(String theType, String theDesc, int theBegin,
108 int theEnd, String group)
111 this.description = theDesc;
112 this.begin = theBegin;
114 this.featureGroup = group;
115 this.score = NO_SCORE;
119 * Constructor including a score value
128 public SequenceFeature(String theType, String theDesc, int theBegin,
129 int theEnd, float theScore, String group)
132 this.description = theDesc;
133 this.begin = theBegin;
135 this.featureGroup = group;
136 this.score = theScore;
140 * A copy constructor that allows the value of final fields to be 'modified'
148 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
149 String newGroup, float newScore)
151 this(sf.getType(), sf.getDescription(), newBegin, newEnd, newScore,
154 if (sf.otherDetails != null)
156 otherDetails = new HashMap<String, Object>();
157 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
159 otherDetails.put(entry.getKey(), entry.getValue());
162 if (sf.links != null && sf.links.size() > 0)
164 links = new Vector<String>();
165 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
167 links.addElement(sf.links.elementAt(i));
173 * Two features are considered equal if they have the same type, group,
174 * description, start, end, phase, strand, and (if present) 'Name', ID' and
175 * 'Parent' attributes.
177 * Note we need to check Parent to distinguish the same exon occurring in
178 * different transcripts (in Ensembl GFF). This allows assembly of transcript
179 * sequences from their component exon regions.
182 public boolean equals(Object o)
184 return equals(o, false);
188 * Overloaded method allows the equality test to optionally ignore the
189 * 'Parent' attribute of a feature. This supports avoiding adding many
190 * superficially duplicate 'exon' or CDS features to genomic or protein
194 * @param ignoreParent
197 public boolean equals(Object o, boolean ignoreParent)
199 if (o == null || !(o instanceof SequenceFeature))
204 SequenceFeature sf = (SequenceFeature) o;
205 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
207 if (begin != sf.begin || end != sf.end || !sameScore)
212 if (getStrand() != sf.getStrand())
217 if (!(type + description + featureGroup + getPhase()).equals(sf.type
218 + sf.description + sf.featureGroup + sf.getPhase()))
222 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
226 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
232 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
241 * Returns true if both values are null, are both non-null and equal
247 protected static boolean equalAttribute(Object att1, Object att2)
249 if (att1 == null && att2 == null)
255 return att1.equals(att2);
257 return att2.equals(att1);
263 * @return DOCUMENT ME!
266 public int getBegin()
274 * @return DOCUMENT ME!
285 * @return DOCUMENT ME!
287 public String getType()
295 * @return DOCUMENT ME!
297 public String getDescription()
302 public void setDescription(String desc)
307 public String getFeatureGroup()
312 public void addLink(String labelLink)
316 links = new Vector<String>();
319 if (!links.contains(labelLink))
321 links.insertElementAt(labelLink, 0);
325 public float getScore()
331 * Used for getting values which are not in the basic set. eg STRAND, PHASE
337 public Object getValue(String key)
339 if (otherDetails == null)
345 return otherDetails.get(key);
350 * Returns a property value for the given key if known, else the specified
354 * @param defaultValue
357 public Object getValue(String key, Object defaultValue)
359 Object value = getValue(key);
360 return value == null ? defaultValue : value;
364 * Used for setting values which are not in the basic set. eg STRAND, FRAME
372 public void setValue(String key, Object value)
376 if (otherDetails == null)
378 otherDetails = new HashMap<String, Object>();
381 otherDetails.put(key, value);
386 * The following methods are added to maintain the castor Uniprot mapping file
389 public void setStatus(String status)
391 setValue(STATUS, status);
394 public String getStatus()
396 return (String) getValue(STATUS);
399 public void setAttributes(String attr)
401 setValue(ATTRIBUTES, attr);
404 public String getAttributes()
406 return (String) getValue(ATTRIBUTES);
410 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
411 * GFF), and 0 for unknown or not (validly) specified
415 public int getStrand()
418 if (otherDetails != null)
420 Object str = otherDetails.get(STRAND);
425 else if ("+".equals(str))
434 * Set the value of strand
437 * should be "+" for forward, or "-" for reverse
439 public void setStrand(String strand)
441 setValue(STRAND, strand);
444 public void setPhase(String phase)
446 setValue(PHASE, phase);
449 public String getPhase()
451 return (String) getValue(PHASE);
455 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
459 public void setEnaLocation(String loc)
461 setValue(LOCATION, loc);
465 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
469 public String getEnaLocation()
471 return (String) getValue(LOCATION);
475 * Readable representation, for debug only, not guaranteed not to change
479 public String toString()
481 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
486 * Overridden to ensure that whenever two objects are equal, they have the
490 public int hashCode()
492 String s = getType() + getDescription() + getFeatureGroup()
493 + getValue("ID") + getValue("Name") + getValue("Parent")
495 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
500 * Answers true if the feature's start/end values represent two related
501 * positions, rather than ends of a range. Such features may be visualised or
502 * reported differently to features on a range.
505 public boolean isContactFeature()
507 // TODO abstract one day to a FeatureType class
508 if ("disulfide bond".equalsIgnoreCase(type)
509 || "disulphide bond".equalsIgnoreCase(type))
517 * Answers true if the sequence has zero start and end position
521 public boolean isNonPositional()
523 return begin == 0 && end == 0;