import java.util.ListIterator;
import java.util.Vector;
-import com.stevesoft.pat.Regex;
-
import fr.orsay.lri.varna.models.rna.RNA;
/**
*/
public class Sequence extends ASequence implements SequenceI
{
- private static final Regex limitrx = new Regex(
- "[/][0-9]{1,}[-][0-9]{1,}$");
-
- private static final Regex endrx = new Regex("[0-9]{1,}$");
-
SequenceI datasetSequence;
String name;
*/
Vector<AlignmentAnnotation> annotation;
- /**
- * The index of the sequence in a MSA
- */
- int index = -1;
-
- private SequenceFeatures sequenceFeatureStore;
+ private SequenceFeaturesI sequenceFeatureStore;
/*
* A cursor holding the approximate current view position to the sequence,
*/
private int changeCount;
- private GeneLoci geneLoci;
-
/**
* Creates a new Sequence object.
*
checkValidRange();
}
+ /**
+ * If 'name' ends in /i-j, where i >= j > 0 are integers, extracts i and j as
+ * start and end respectively and removes the suffix from the name
+ */
void parseId()
{
if (name == null)
"POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
name = "";
}
- // Does sequence have the /start-end signature?
- if (limitrx.search(name))
+ int slashPos = name.lastIndexOf('/');
+ if (slashPos > -1 && slashPos < name.length() - 1)
{
- name = limitrx.left();
- endrx.search(limitrx.stringMatched());
- setStart(Integer.parseInt(limitrx.stringMatched().substring(1,
- endrx.matchedFrom() - 1)));
- setEnd(Integer.parseInt(endrx.stringMatched()));
+ String suffix = name.substring(slashPos + 1);
+ String[] range = suffix.split("-");
+ if (range.length == 2)
+ {
+ try
+ {
+ int from = Integer.valueOf(range[0]);
+ int to = Integer.valueOf(range[1]);
+ if (from > 0 && to >= from)
+ {
+ name = name.substring(0, slashPos);
+ setStart(from);
+ setEnd(to);
+ checkValidRange();
+ }
+ } catch (NumberFormatException e)
+ {
+ // leave name unchanged if suffix is invalid
+ }
+ }
}
}
+ /**
+ * Ensures that 'end' is not before the end of the sequence, that is,
+ * (end-start+1) is at least as long as the count of ungapped positions. Note
+ * that end is permitted to be beyond the end of the sequence data.
+ */
void checkValidRange()
{
// Note: JAL-774 :
int endRes = 0;
for (int j = 0; j < sequence.length; j++)
{
- if (!jalview.util.Comparison.isGap(sequence[j]))
+ if (!Comparison.isGap(sequence[j]))
{
endRes++;
}
}
/**
- * DOCUMENT ME!
+ * Sets the sequence name. If the name ends in /start-end, then the start-end
+ * values are parsed out and set, and the suffix is removed from the name.
*
- * @param name
- * DOCUMENT ME!
+ * @param theName
*/
@Override
- public void setName(String name)
+ public void setName(String theName)
{
- this.name = name;
+ this.name = theName;
this.parseId();
}
public void setDescription(String desc)
{
this.description = desc;
- parseDescription();
- }
-
- /**
- * Parses and saves fields of an Ensembl-style description e.g.
- * chromosome:GRCh38:17:45051610:45109016:1
- */
- protected void parseDescription()
- {
- if (description == null)
- {
- return;
- }
- String[] tokens = description.split(":");
- if (tokens.length == 6 && "chromosome".equals(tokens[0])) {
- String ref = tokens[1];
- String chrom = tokens[2];
- try {
- int chStart = Integer.parseInt(tokens[3]);
- int chEnd = Integer.parseInt(tokens[4]);
- boolean forwardStrand = "1".equals(tokens[5]);
- String species = ""; // dunno yet!
- int[] from = new int[] { start, end };
- int[] to = new int[] { forwardStrand ? chStart : chEnd,
- forwardStrand ? chEnd : chStart };
- MapList map = new MapList(from, to, 1, 1);
- GeneLoci gl = new GeneLoci(species, ref, chrom, map);
- setGeneLoci(gl);
- } catch (NumberFormatException e)
- {
- System.err.println("Bad integers in description " + description);
- }
- }
}
- public void setGeneLoci(GeneLoci gl)
+ @Override
+ public void setGeneLoci(String speciesId, String assemblyId,
+ String chromosomeId, MapList map)
{
- geneLoci = gl;
+ addDBRef(new DBRefEntry(speciesId, assemblyId, DBRefEntry.CHROMOSOME
+ + ":" + chromosomeId, new Mapping(map)));
}
/**
*
* @return
*/
- public GeneLoci getGeneLoci()
+ @Override
+ public GeneLociI getGeneLoci()
{
- return geneLoci;
+ DBRefEntry[] refs = getDBRefs();
+ if (refs != null)
+ {
+ for (final DBRefEntry ref : refs)
+ {
+ if (ref.isChromosome())
+ {
+ return new GeneLociI()
+ {
+ @Override
+ public String getSpeciesId()
+ {
+ return ref.getSource();
+ }
+
+ @Override
+ public String getAssemblyId()
+ {
+ return ref.getVersion();
+ }
+
+ @Override
+ public String getChromosomeId()
+ {
+ // strip off "chromosome:" prefix to chrId
+ return ref.getAccessionId().substring(
+ DBRefEntry.CHROMOSOME.length() + 1);
+ }
+
+ @Override
+ public MapList getMap()
+ {
+ return ref.getMap().getMap();
+ }
+ };
+ }
+ }
+ }
+ return null;
}
/**
}
}
- /**
- * @return The index (zero-based) on this sequence in the MSA. It returns
- * {@code -1} if this information is not available.
- */
- @Override
- public int getIndex()
- {
- return index;
- }
-
- /**
- * Defines the position of this sequence in the MSA. Use the value {@code -1}
- * if this information is undefined.
- *
- * @param The
- * position for this sequence. This value is zero-based (zero for
- * this first sequence)
- */
- @Override
- public void setIndex(int value)
- {
- index = value;
- }
-
@Override
public void setRNA(RNA r)
{
* and we may have included adjacent or enclosing features;
* remove any that are not enclosing, non-contact features
*/
- if (endPos > this.end || Comparison.isGap(sequence[toColumn - 1]))
+ boolean endColumnIsGapped = toColumn > 0 && toColumn <= sequence.length
+ && Comparison.isGap(sequence[toColumn - 1]);
+ if (endPos > this.end || endColumnIsGapped)
{
ListIterator<SequenceFeature> it = result.listIterator();
while (it.hasNext())