2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureLocationI;
25 import java.util.Comparator;
26 import java.util.HashMap;
28 import java.util.Map.Entry;
29 import java.util.Vector;
37 public class SequenceFeature implements FeatureLocationI
40 * score value if none is set; preferably Float.Nan, but see
41 * JAL-2060 and JAL-2554 for a couple of blockers to that
43 private static final float NO_SCORE = 0f;
45 private static final String STATUS = "status";
47 private static final String STRAND = "STRAND";
49 // private key for Phase designed not to conflict with real GFF data
50 private static final String PHASE = "!Phase";
52 // private key for ENA location designed not to conflict with real GFF data
53 private static final String LOCATION = "!Location";
56 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
57 * name1=value1;name2=value2,value3;...etc
59 private static final String ATTRIBUTES = "ATTRIBUTES";
62 * type, begin, end, featureGroup, score and contactFeature are final
63 * to ensure that the integrity of SequenceFeatures data store
64 * can't be broken by direct update of these fields
66 public final String type;
68 public final int begin;
72 public final String featureGroup;
74 public final float score;
76 private final boolean contactFeature;
78 public String description;
81 * a map of key-value pairs; may be populated from GFF 'column 9' data,
82 * other data sources (e.g. GenBank file), or programmatically
84 public Map<String, Object> otherDetails;
86 public Vector<String> links;
89 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
90 * otherDetails map, so the new and original SequenceFeature may reference the
91 * same objects in the map.
95 public SequenceFeature(SequenceFeature cpy)
97 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
110 public SequenceFeature(String theType, String theDesc, int theBegin,
111 int theEnd, String group)
113 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
117 * Constructor including a score value
126 public SequenceFeature(String theType, String theDesc, int theBegin,
127 int theEnd, float theScore, String group)
130 this.description = theDesc;
131 this.begin = theBegin;
133 this.featureGroup = group;
134 this.score = theScore;
137 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
139 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
140 || "disulphide bond".equalsIgnoreCase(type);
144 * A copy constructor that allows the value of final fields to be 'modified'
153 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
154 int newEnd, String newGroup, float newScore)
156 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
159 if (sf.otherDetails != null)
161 otherDetails = new HashMap<>();
162 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
164 otherDetails.put(entry.getKey(), entry.getValue());
167 if (sf.links != null && sf.links.size() > 0)
169 links = new Vector<>();
170 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
172 links.addElement(sf.links.elementAt(i));
178 * A copy constructor that allows the value of final fields to be 'modified'
186 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
187 String newGroup, float newScore)
189 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
193 * Two features are considered equal if they have the same type, group,
194 * description, start, end, phase, strand, and (if present) 'Name', ID' and
195 * 'Parent' attributes.
197 * Note we need to check Parent to distinguish the same exon occurring in
198 * different transcripts (in Ensembl GFF). This allows assembly of transcript
199 * sequences from their component exon regions.
202 public boolean equals(Object o)
204 return equals(o, false);
208 * Overloaded method allows the equality test to optionally ignore the
209 * 'Parent' attribute of a feature. This supports avoiding adding many
210 * superficially duplicate 'exon' or CDS features to genomic or protein
214 * @param ignoreParent
217 public boolean equals(Object o, boolean ignoreParent)
219 if (o == null || !(o instanceof SequenceFeature))
224 SequenceFeature sf = (SequenceFeature) o;
225 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
227 if (begin != sf.begin || end != sf.end || !sameScore)
232 if (getStrand() != sf.getStrand())
237 if (!(type + description + featureGroup + getPhase()).equals(
238 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
242 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
246 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
252 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
261 * Returns true if both values are null, are both non-null and equal
267 protected static boolean equalAttribute(Object att1, Object att2)
269 if (att1 == null && att2 == null)
275 return att1.equals(att2);
277 return att2.equals(att1);
283 * @return DOCUMENT ME!
286 public int getBegin()
294 * @return DOCUMENT ME!
305 * @return DOCUMENT ME!
307 public String getType()
315 * @return DOCUMENT ME!
317 public String getDescription()
322 public void setDescription(String desc)
327 public String getFeatureGroup()
332 public void addLink(String labelLink)
336 links = new Vector<>();
339 if (!links.contains(labelLink))
341 links.insertElementAt(labelLink, 0);
345 public float getScore()
351 * Used for getting values which are not in the basic set. eg STRAND, PHASE
357 public Object getValue(String key)
359 if (otherDetails == null)
365 return otherDetails.get(key);
370 * Returns a property value for the given key if known, else the specified
374 * @param defaultValue
377 public Object getValue(String key, Object defaultValue)
379 Object value = getValue(key);
380 return value == null ? defaultValue : value;
384 * Used for setting values which are not in the basic set. eg STRAND, FRAME
392 public void setValue(String key, Object value)
396 if (otherDetails == null)
398 otherDetails = new HashMap<>();
401 otherDetails.put(key, value);
406 * The following methods are added to maintain the castor Uniprot mapping file
409 public void setStatus(String status)
411 setValue(STATUS, status);
414 public String getStatus()
416 return (String) getValue(STATUS);
419 public void setAttributes(String attr)
421 setValue(ATTRIBUTES, attr);
424 public String getAttributes()
426 return (String) getValue(ATTRIBUTES);
430 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
431 * GFF), and 0 for unknown or not (validly) specified
435 public int getStrand()
438 if (otherDetails != null)
440 Object str = otherDetails.get(STRAND);
445 else if ("+".equals(str))
454 * Set the value of strand
457 * should be "+" for forward, or "-" for reverse
459 public void setStrand(String strand)
461 setValue(STRAND, strand);
464 public void setPhase(String phase)
466 setValue(PHASE, phase);
469 public String getPhase()
471 return (String) getValue(PHASE);
475 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
479 public void setEnaLocation(String loc)
481 setValue(LOCATION, loc);
485 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
489 public String getEnaLocation()
491 return (String) getValue(LOCATION);
495 * Readable representation, for debug only, not guaranteed not to change
499 public String toString()
501 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
506 * Overridden to ensure that whenever two objects are equal, they have the
510 public int hashCode()
512 String s = getType() + getDescription() + getFeatureGroup()
513 + getValue("ID") + getValue("Name") + getValue("Parent")
515 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
520 * Answers true if the feature's start/end values represent two related
521 * positions, rather than ends of a range. Such features may be visualised or
522 * reported differently to features on a range.
525 public boolean isContactFeature()
527 return contactFeature;
531 * Answers true if the sequence has zero start and end position
535 public boolean isNonPositional()
537 return begin == 0 && end == 0;
541 class SFSortByEnd implements Comparator<SequenceFeature>
544 public int compare(SequenceFeature a, SequenceFeature b)
546 return a.getEnd() - b.getEnd();
550 class SFSortByBegin implements Comparator<SequenceFeature>
553 public int compare(SequenceFeature a, SequenceFeature b)
555 return a.getBegin() - b.getBegin();