{
updateDbrefMappings(dna, seq, xrfs, retrieved, cf);
+ SequenceIdMatcher matcher = new SequenceIdMatcher(
+ dataset.getSequences());
+ matcher.addAll(addedPeers);
List<SequenceFeature> copiedFeatures = new ArrayList<SequenceFeature>();
CrossRef me = new CrossRef();
for (int rs = 0; rs < retrieved.length; rs++)
{
if (map.getTo() != null && map.getMap() != null)
{
- // should search the local dataset to find any existing
- // candidates for To !
+ SequenceI matched = matcher
+ .findIdMatch(map.getTo());
+ if (matched != null)
+ {
+ map.setTo(matched);
+ }
+ else
+ {
+ matcher.add(map.getTo());
+ }
try
{
// compare ms with dss and replace with dss in mapping
}
else
{
- addedPeers.add(map.getTo());
+ if (!addedPeers.contains(map.getTo()))
+ {
+ addedPeers.add(map.getTo());
+ }
cf.addMap(retrieved[rs].getDatasetSequence(),
map.getTo(), map.getMap());
}
}
/**
- * add more sequences to this matcher - also used by the constructor
+ * Adds sequences to this matcher
*
* @param seqs
*/
{
for (SequenceI seq : seqs)
{
- // TODO: deal with ID collisions - SequenceI should be appended to list
- // associated with this key.
- names.put(new SeqIdName(seq.getDisplayId(true)), seq);
- SequenceI dbseq = seq;
- while (dbseq.getDatasetSequence() != null)
- {
- dbseq = dbseq.getDatasetSequence();
- }
- // add in any interesting identifiers
- if (dbseq.getDBRefs() != null)
+ add(seq);
+ }
+ }
+
+ /**
+ * Adds one sequence to this matcher
+ *
+ * @param seq
+ */
+ public void add(SequenceI seq)
+ {
+ // TODO: deal with ID collisions - SequenceI should be appended to list
+ // associated with this key.
+ names.put(new SeqIdName(seq.getDisplayId(true)), seq);
+ SequenceI dbseq = seq;
+ while (dbseq.getDatasetSequence() != null)
+ {
+ dbseq = dbseq.getDatasetSequence();
+ }
+ // add in any interesting identifiers
+ if (dbseq.getDBRefs() != null)
+ {
+ DBRefEntry dbr[] = dbseq.getDBRefs();
+ SeqIdName sid = null;
+ for (int r = 0; r < dbr.length; r++)
{
- DBRefEntry dbr[] = dbseq.getDBRefs();
- SeqIdName sid = null;
- for (int r = 0; r < dbr.length; r++)
+ sid = new SeqIdName(dbr[r].getAccessionId());
+ if (!names.containsKey(sid))
{
- sid = new SeqIdName(dbr[r].getAccessionId());
- if (!names.containsKey(sid))
- {
- names.put(sid, seq);
- }
+ names.put(sid, seq);
}
}
}
*/
package jalview.datamodel.xdb.embl;
+import jalview.analysis.SequenceIdMatcher;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.FeatureProperties;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
+import jalview.util.StringUtils;
import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import java.util.Vector;
+import java.util.regex.Pattern;
/**
* Data model for one entry returned from an EMBL query, as marshalled by a
* Castor binding file
*
- * For example: http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
+ * For example:
+ * http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=ena_sequence&id=J03321
+ * &format=emblxml
*
* @see embl_mapping.xml
*/
public class EmblEntry
{
+ private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
+
String accession;
String version;
this.version = version;
}
- /*
- * EMBL Feature support is limited. The text below is included for the benefit
- * of any developer working on improving EMBL feature import in Jalview.
- * Extract from EMBL feature specification see
- * http://www.embl-ebi.ac.uk/embl/Documentation
- * /FT_definitions/feature_table.html 3.5 Location 3.5.1 Purpose
- *
- * The location indicates the region of the presented sequence which
- * corresponds to a feature.
- *
- * 3.5.2 Format and conventions The location contains at least one sequence
- * location descriptor and may contain one or more operators with one or more
- * sequence location descriptors. Base numbers refer to the numbering in the
- * entry. This numbering designates the first base (5' end) of the presented
- * sequence as base 1. Base locations beyond the range of the presented
- * sequence may not be used in location descriptors, the only exception being
- * location in a remote entry (see 3.5.2.1, e).
- *
- * Location operators and descriptors are discussed in more detail below.
- *
- * 3.5.2.1 Location descriptors
- *
- * The location descriptor can be one of the following: (a) a single base
- * number (b) a site between two indicated adjoining bases (c) a single base
- * chosen from within a specified range of bases (not allowed for new entries)
- * (d) the base numbers delimiting a sequence span (e) a remote entry
- * identifier followed by a local location descriptor (i.e., a-d)
- *
- * A site between two adjoining nucleotides, such as endonucleolytic cleavage
- * site, is indicated by listing the two points separated by a carat (^). The
- * permitted formats for this descriptor are n^n+1 (for example 55^56), or,
- * for circular molecules, n^1, where "n" is the full length of the molecule,
- * ie 1000^1 for circular molecule with length 1000.
- *
- * A single base chosen from a range of bases is indicated by the first base
- * number and the last base number of the range separated by a single period
- * (e.g., '12.21' indicates a single base taken from between the indicated
- * points). From October 2006 the usage of this descriptor is restricted : it
- * is illegal to use "a single base from a range" (c) either on its own or in
- * combination with the "sequence span" (d) descriptor for newly created
- * entries. The existing entries where such descriptors exist are going to be
- * retrofitted.
- *
- * Sequence spans are indicated by the starting base number and the ending
- * base number separated by two periods (e.g., '34..456'). The '<' and '>'
- * symbols may be used with the starting and ending base numbers to indicate
- * that an end point is beyond the specified base number. The starting and
- * ending base positions can be represented as distinct base numbers
- * ('34..456') or a site between two indicated adjoining bases.
- *
- * A location in a remote entry (not the entry to which the feature table
- * belongs) can be specified by giving the accession-number and sequence
- * version of the remote entry, followed by a colon ":", followed by a
- * location descriptor which applies to that entry's sequence (i.e.
- * J12345.1:1..15, see also examples below)
- *
- * 3.5.2.2 Operators
- *
- * The location operator is a prefix that specifies what must be done to the
- * indicated sequence to find or construct the location corresponding to the
- * feature. A list of operators is given below with their definitions and most
- * common format.
- *
- * complement(location) Find the complement of the presented sequence in the
- * span specified by " location" (i.e., read the complement of the presented
- * strand in its 5'-to-3' direction)
- *
- * join(location,location, ... location) The indicated elements should be
- * joined (placed end-to-end) to form one contiguous sequence
- *
- * order(location,location, ... location) The elements can be found in the
- * specified order (5' to 3' direction), but nothing is implied about the
- * reasonableness about joining them
- *
- * Note : location operator "complement" can be used in combination with
- * either " join" or "order" within the same location; combinations of "join"
- * and "order" within the same location (nested operators) are illegal.
- *
- *
- *
- * 3.5.3 Location examples
- *
- * The following is a list of common location descriptors with their meanings:
- *
- * Location Description
- *
- * 467 Points to a single base in the presented sequence
- *
- * 340..565 Points to a continuous range of bases bounded by and including the
- * starting and ending bases
- *
- * <345..500 Indicates that the exact lower boundary point of a feature is
- * unknown. The location begins at some base previous to the first base
- * specified (which need not be contained in the presented sequence) and
- * continues to and includes the ending base
- *
- * <1..888 The feature starts before the first sequenced base and continues to
- * and includes base 888
- *
- * 1..>888 The feature starts at the first sequenced base and continues beyond
- * base 888
- *
- * 102.110 Indicates that the exact location is unknown but that it is one of
- * the bases between bases 102 and 110, inclusive
- *
- * 123^124 Points to a site between bases 123 and 124
- *
- * join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to
- * form one contiguous sequence
- *
- *
- * complement(34..126) Start at the base complementary to 126 and finish at
- * the base complementary to base 34 (the feature is on the strand
- * complementary to the presented strand)
- *
- *
- * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and 4918
- * to 5163, then complements the joined segments (the feature is on the strand
- * complementary to the presented strand)
- *
- * join(complement(4918..5163),complement(2691..4571)) Complements regions
- * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the
- * feature is on the strand complementary to the presented strand)
- *
- * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in
- * this database) with primary accession number 'J00194'
- *
- * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry
- * with the region 100..202 of remote entry J00194
- */
/**
* Recover annotated sequences from EMBL file
*
- * @param noNa
- * don't return nucleic acid sequences
* @param sourceDb
- * TODO
- * @param noProtein
- * don't return any translated protein sequences marked in features
- * @return dataset sequences with DBRefs and features - DNA always comes first
+ * @param peptides
+ * a list of protein products found so far (to add to)
+ * @return dna dataset sequence with DBRefs and features
*/
- public jalview.datamodel.SequenceI[] getSequences(boolean noNa,
- boolean noPeptide, String sourceDb)
- { // TODO: ensure emblEntry.getSequences behaves correctly for returning all
- // cases of noNa and noPeptide
- Vector<SequenceI> seqs = new Vector<SequenceI>();
- Sequence dna = null;
- if (!noNa)
+ public SequenceI getSequence(String sourceDb, List<SequenceI> peptides)
+ {
+ SequenceI dna = new Sequence(sourceDb + "|" + accession,
+ sequence.getSequence());
+ dna.setDescription(desc);
+ DBRefEntry retrievedref = new DBRefEntry(sourceDb, version, accession);
+ dna.addDBRef(retrievedref);
+ // add map to indicate the sequence is a valid coordinate frame for the
+ // dbref
+ retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
+ new int[] { 1, dna.getLength() }, 1, 1));
+ // TODO: transform EMBL Database refs to canonical form
+ if (dbRefs != null)
{
- // In theory we still need to create this if noNa is set to avoid a null
- // pointer exception
- dna = new Sequence(sourceDb + "|" + accession, sequence.getSequence());
- dna.setDescription(desc);
- DBRefEntry retrievedref = new DBRefEntry(sourceDb, version, accession);
- dna.addDBRef(retrievedref);
- // add map to indicate the sequence is a valid coordinate frame for the
- // dbref
- retrievedref.setMap(new Mapping(null,
- new int[] { 1, dna.getLength() }, new int[] { 1,
- dna.getLength() }, 1, 1));
- // TODO: transform EMBL Database refs to canonical form
- if (dbRefs != null)
+ for (DBRefEntry dbref : dbRefs)
{
- for (DBRefEntry dbref : dbRefs)
- {
- dna.addDBRef(dbref);
- }
+ dna.addDBRef(dbref);
}
}
+
try
{
for (EmblFeature feature : features)
{
- if (!noNa)
+ if (feature.dbRefs != null)
{
- if (feature.dbRefs != null)
+ for (DBRefEntry dbref : feature.dbRefs)
{
- for (DBRefEntry dbref : feature.dbRefs)
- {
- dna.addDBRef(dbref);
- }
+ dna.addDBRef(dbref);
}
}
if (FeatureProperties.isCodingFeature(sourceDb, feature.getName()))
{
- parseCodingFeature(feature, sourceDb, seqs, dna, noPeptide);
- }
- else
- {
- // General feature type.
- // TODO this is just duplicated code ??
- if (!noNa)
- {
- if (feature.dbRefs != null)
- {
- for (DBRefEntry dbref : feature.dbRefs)
- {
- dna.addDBRef(dbref);
- }
- }
- }
+ parseCodingFeature(feature, sourceDb, dna, peptides);
}
}
} catch (Exception e)
System.err.println("Resulted in exception: " + e.getMessage());
e.printStackTrace(System.err);
}
- if (!noNa && dna != null)
- {
- seqs.add(dna);
- }
- SequenceI[] sqs = new SequenceI[seqs.size()];
- for (int i = 0, j = seqs.size(); i < j; i++)
- {
- sqs[i] = seqs.elementAt(i);
- seqs.set(i, null);
- }
- return sqs;
+
+ return dna;
}
/**
- * attempt to extract coding region and product from a feature and properly
- * decorate it with annotations.
+ * Extracts coding region and product from a CDS feature and properly decorate
+ * it with annotations.
*
* @param feature
* coding feature
* @param sourceDb
* source database for the EMBLXML
- * @param seqs
- * place where sequences go
* @param dna
* parent dna sequence for this record
- * @param noPeptide
- * flag for generation of Peptide sequence objects
+ * @param peptides
+ * list of protein product sequences for Embl entry
*/
- private void parseCodingFeature(EmblFeature feature, String sourceDb,
- Vector<SequenceI> seqs, Sequence dna, boolean noPeptide)
+ void parseCodingFeature(EmblFeature feature, String sourceDb,
+ SequenceI dna, List<SequenceI> peptides)
{
boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS);
- // extract coding region(s)
- jalview.datamodel.Mapping map = null;
- int[] exon = null;
- if (feature.locations != null)
- {
- for (EmblFeatureLocations loc : feature.locations)
- {
- int[] se = loc.getElementRanges(accession);
- if (exon == null)
- {
- exon = se;
- }
- else
- {
- int[] t = new int[exon.length + se.length];
- System.arraycopy(exon, 0, t, 0, exon.length);
- System.arraycopy(se, 0, t, exon.length, se.length);
- exon = t;
- }
- }
- }
+
+ int[] exon = getCdsRanges(feature);
+
String prseq = null;
- String prname = new String();
+ String prname = "";
String prid = null;
- Hashtable<String, String> vals = new Hashtable<String, String>();
- int prstart = 1;
- // get qualifiers
+ Map<String, String> vals = new Hashtable<String, String>();
+ SequenceIdMatcher matcher = new SequenceIdMatcher(peptides);
+
+ /*
+ * codon_start 1/2/3 in EMBL corresponds to phase 0/1/2 in CDS
+ * (phase is required for CDS features in GFF3 format)
+ */
+ int codonStart = 1;
+
+ /*
+ * parse qualifiers, saving protein translation, protein id,
+ * codon start position, product (name), and 'other values'
+ */
if (feature.getQualifiers() != null)
{
for (Qualifier q : feature.getQualifiers())
String qname = q.getName();
if (qname.equals("translation"))
{
- StringBuilder prsq = new StringBuilder(q.getValues()[0]);
- int p = prsq.indexOf(" ");
- while (p > -1)
- {
- prsq.deleteCharAt(p);
- p = prsq.indexOf(" ", p);
- }
- prseq = prsq.toString();
- prsq = null;
-
+ // remove all spaces (precompiled String.replaceAll(" ", ""))
+ prseq = SPACE_PATTERN.matcher(q.getValues()[0]).replaceAll("");
}
else if (qname.equals("protein_id"))
{
}
else if (qname.equals("codon_start"))
{
- prstart = Integer.parseInt(q.getValues()[0]);
+ try
+ {
+ codonStart = Integer.parseInt(q.getValues()[0]);
+ } catch (NumberFormatException e)
+ {
+ System.err.println("Invalid codon_start in XML for "
+ + accession + ": " + e.getMessage());
+ }
}
else if (qname.equals("product"))
{
+ // sometimes name is returned e.g. for V00488
prname = q.getValues()[0];
}
else
{
// throw anything else into the additional properties hash
- String[] s = q.getValues();
- StringBuilder sb = new StringBuilder();
- if (s != null)
+ String[] qvals = q.getValues();
+ if (qvals != null)
{
- for (int i = 0; i < s.length; i++)
- {
- sb.append(s[i]);
- sb.append("\n");
- }
+ String commaSeparated = StringUtils.arrayToSeparatorList(qvals,
+ ",");
+ vals.put(qname, commaSeparated);
}
- vals.put(qname, sb.toString());
}
}
}
- Sequence product = null;
+
+ // SequenceI product = null;
DBRefEntry protEMBLCDS = null;
- exon = adjustForPrStart(prstart, exon);
+ exon = MappingUtils.removeStartPositions(codonStart - 1, exon);
boolean noProteinDbref = true;
+ SequenceI product = null;
+ Mapping map = null;
if (prseq != null && prname != null && prid != null)
{
- // extract proteins.
- product = new Sequence(prid, prseq, 1, prseq.length());
- product.setDescription(((prname.length() == 0) ? "Protein Product from "
- + sourceDb
- : prname));
- if (!noPeptide)
+ /*
+ * look for product in peptides list, if not found, add it
+ */
+ product = matcher.findIdMatch(prid);
+ if (product == null)
{
- // Protein is also added to vector of sequences returned
- seqs.add(product);
+ product = new Sequence(prid, prseq, 1, prseq.length());
+ product.setDescription(((prname.length() == 0) ? "Protein Product from "
+ + sourceDb
+ : prname));
+ peptides.add(product);
+ matcher.add(product);
}
+
// we have everything - create the mapping and perhaps the protein
// sequence
if (exon == null || exon.length == 0)
System.err
.println("Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect ("
+ sourceDb + ":" + getAccession() + ")");
- if (prseq.length() * 3 == (1 - prstart + dna.getSequence().length))
+ if (prseq.length() * 3 == (1 - codonStart + dna.getSequence().length))
{
System.err
.println("Not allowing for additional stop codon at end of cDNA fragment... !");
// this might occur for CDS sequences where no features are
// marked.
- exon = new int[] { dna.getStart() + (prstart - 1), dna.getEnd() };
- map = new jalview.datamodel.Mapping(product, exon, new int[] { 1,
- prseq.length() }, 3, 1);
+ exon = new int[] { dna.getStart() + (codonStart - 1), dna.getEnd() };
+ map = new Mapping(product, exon, new int[] { 1, prseq.length() },
+ 3, 1);
}
- if ((prseq.length() + 1) * 3 == (1 - prstart + dna.getSequence().length))
+ if ((prseq.length() + 1) * 3 == (1 - codonStart + dna.getSequence().length))
{
System.err
.println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!");
- exon = new int[] { dna.getStart() + (prstart - 1),
+ exon = new int[] { dna.getStart() + (codonStart - 1),
dna.getEnd() - 3 };
- map = new jalview.datamodel.Mapping(product, exon, new int[] { 1,
- prseq.length() }, 3, 1);
+ map = new Mapping(product, exon, new int[] { 1, prseq.length() },
+ 3, 1);
}
}
else
}
else
{
- // final product length trunctation check
-
- map = new jalview.datamodel.Mapping(product,
- adjustForProteinLength(prseq.length(), exon), new int[] {
- 1, prseq.length() }, 3, 1);
+ // final product length truncation check
+ // TODO should from range include stop codon even if not in protein
+ // in order to include stop codon in CDS sequence (as done for
+ // Ensembl)?
+ int[] cdsRanges = adjustForProteinLength(prseq.length(),
+ exon);
+ map = new Mapping(product, cdsRanges, new int[] { 1, prseq.length() }, 3, 1);
// reconstruct the EMBLCDS entry
// TODO: this is only necessary when there codon annotation is
// complete (I think JBPNote)
pcdnaref.setAccessionId(prid);
pcdnaref.setSource(DBRefSource.EMBLCDS);
pcdnaref.setVersion(getVersion()); // same as parent EMBL version.
- jalview.util.MapList mp = new jalview.util.MapList(new int[] { 1,
- prseq.length() }, new int[] { 1 + (prstart - 1),
- (prstart - 1) + 3 * prseq.length() }, 1, 3);
- // { 1 + (prstart - 1) * 3,
- // 1 + (prstart - 1) * 3 + prseq.length() * 3 - 1 }, new int[]
- // { 1prstart, prstart + prseq.length() - 1 }, 3, 1);
+ MapList mp = new MapList(new int[] { 1, prseq.length() },
+ new int[] { 1 + (codonStart - 1),
+ (codonStart - 1) + 3 * prseq.length() }, 1, 3);
pcdnaref.setMap(new Mapping(mp));
if (product != null)
{
protEMBLCDS = new DBRefEntry(pcdnaref);
protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct);
product.addDBRef(protEMBLCDS);
-
}
-
}
}
// add cds feature to dna seq - this may include the stop codon
for (int xint = 0; exon != null && xint < exon.length; xint += 2)
{
- SequenceFeature sf = new SequenceFeature();
- sf.setBegin(exon[xint]);
- sf.setEnd(exon[xint + 1]);
- sf.setType(feature.getName());
+ SequenceFeature sf = makeCdsFeature(exon, xint, prname, prid, vals,
+ codonStart);
+ sf.setType(feature.getName()); // "CDS"
sf.setFeatureGroup(sourceDb);
- sf.setDescription("Exon " + (1 + xint / 2) + " for protein '"
- + prname + "' EMBLCDS:" + prid);
- sf.setValue(FeatureProperties.EXONPOS, new Integer(1 + xint));
- sf.setValue(FeatureProperties.EXONPRODUCT, prname);
- if (vals != null)
- {
- for (Entry<String, String> val : vals.entrySet())
- {
- sf.setValue(val.getKey(), val.getValue());
- }
- }
dna.addSequenceFeature(sf);
}
}
// add dbRefs to sequence
if (feature.dbRefs != null)
{
+ boolean productMapped = false;
for (DBRefEntry ref : feature.dbRefs)
{
- ref.setSource(jalview.util.DBRefUtils.getCanonicalName(ref
- .getSource()));
+ ref.setSource(DBRefUtils.getCanonicalName(ref.getSource()));
// Hard code the kind of protein product accessions that EMBL cite
- if (ref.getSource().equals(jalview.datamodel.DBRefSource.UNIPROT))
+ if (ref.getSource().equals(DBRefSource.UNIPROT))
{
+ String refSeqName = DBRefSource.UNIPROT + "|"
+ + ref.getAccessionId();
ref.setMap(map);
if (map != null && map.getTo() != null)
{
- map.getTo().addDBRef(
- new DBRefEntry(ref.getSource(), ref.getVersion(), ref
- .getAccessionId())); // don't copy map over.
- if (map.getTo().getName().indexOf(prid) == 0)
- {
- map.getTo().setName(
- jalview.datamodel.DBRefSource.UNIPROT + "|"
- + ref.getAccessionId());
- }
+ // if (!productMapped)
+ // {
+ // map.getTo().setName(refSeqName);
+ // map.getTo().addDBRef(
+ // new DBRefEntry(ref.getSource(), ref.getVersion(), ref
+ // .getAccessionId())); // don't copy map over.
+ // // if (map.getTo().getName().startsWith(prid))
+ // productMapped = true;
+ // }
+ // else
+ // {
+ /*
+ * an alternate UNIPROT product for CDS - same mapping
+ * but to a sequence with a different name
+ */
+ SequenceI newSeq = matcher.findIdMatch(refSeqName);
+ if (newSeq == null)
+ {
+ newSeq = new Sequence(refSeqName, map.getTo()
+ .getSequenceAsString());
+ matcher.add(newSeq);
+ peptides.add(newSeq);
+ }
+ Mapping newMap = new Mapping(newSeq, map.getMap());
+ ref.setMap(newMap);
+ // }
}
noProteinDbref = false;
}
}
}
- private int[] adjustForPrStart(int prstart, int[] exon)
+ /**
+ * Helper method to construct a SequenceFeature for one cds range
+ *
+ * @param exons
+ * array of cds [start, end, ...] positions
+ * @param exonStartIndex
+ * offset into the exons array
+ * @param proteinName
+ * @param proteinAccessionId
+ * @param vals
+ * map of 'miscellaneous values' for feature
+ * @param codonStart
+ * codon start position for CDS (1/2/3, normally 1)
+ * @return
+ */
+ protected SequenceFeature makeCdsFeature(int[] exons, int exonStartIndex,
+ String proteinName, String proteinAccessionId,
+ Map<String, String> vals, int codonStart)
{
-
- int origxon[], sxpos = -1;
- int sxstart, sxstop; // unnecessary variables used for debugging
- // first adjust range for codon start attribute
- if (prstart > 1)
+ int exonNumber = exonStartIndex / 2 + 1;
+ SequenceFeature sf = new SequenceFeature();
+ sf.setBegin(Math.min(exons[exonStartIndex], exons[exonStartIndex + 1]));
+ sf.setEnd(Math.max(exons[exonStartIndex], exons[exonStartIndex + 1]));
+ sf.setDescription(String.format(
+ "Exon %d for protein '%s' EMBLCDS:%s", exonNumber, proteinName,
+ proteinAccessionId));
+ sf.setPhase(String.valueOf(codonStart - 1));
+ sf.setStrand(exons[exonStartIndex] <= exons[exonStartIndex + 1] ? "+" : "-");
+ sf.setValue(FeatureProperties.EXONPOS, exonNumber);
+ sf.setValue(FeatureProperties.EXONPRODUCT, proteinName);
+ if (!vals.isEmpty())
{
- origxon = new int[exon.length];
- System.arraycopy(exon, 0, origxon, 0, exon.length);
- int cdspos = 0;
- for (int x = 0; x < exon.length && sxpos == -1; x += 2)
+ StringBuilder sb = new StringBuilder();
+ boolean first = true;
+ for (Entry<String, String> val : vals.entrySet())
{
- cdspos += exon[x + 1] - exon[x] + 1;
- if (prstart <= cdspos)
+ if (!first)
{
- sxpos = x;
- sxstart = exon[x];
- sxstop = exon[x + 1];
- // and adjust start boundary of first exon.
- exon[x] = exon[x + 1] - cdspos + prstart;
- break;
+ sb.append(";");
}
+ sb.append(val.getKey()).append("=").append(val.getValue());
+ first = false;
+ sf.setValue(val.getKey(), val.getValue());
}
+ sf.setAttributes(sb.toString());
+ }
+ return sf;
+ }
- if (sxpos > 0)
- {
- int[] nxon = new int[exon.length - sxpos];
- System.arraycopy(exon, sxpos, nxon, 0, exon.length - sxpos);
- exon = nxon;
- }
+ /**
+ * Returns the CDS positions as a list of [start, end, start, end...]
+ * positions. If on the reverse strand, these will be in descending order.
+ *
+ * @param feature
+ * @return
+ */
+ protected int[] getCdsRanges(EmblFeature feature)
+ {
+ if (feature.locations == null)
+ {
+ return new int[] {};
}
- return exon;
+ int cdsBoundaryCount = 0; // count of all start/stop locations
+ int[][] cdsLocations = new int[feature.locations.size()][];
+ int locationNumber = 0;
+ for (EmblFeatureLocations loc : feature.locations)
+ {
+ int[] locationRanges = loc.getElementRanges(accession);
+ cdsLocations[locationNumber++] = locationRanges;
+ cdsBoundaryCount += locationRanges.length;
+ }
+ int[] cdsRanges = new int[cdsBoundaryCount];
+ int copyTo = 0;
+ for (int[] ranges : cdsLocations)
+ {
+ System.arraycopy(ranges, 0, cdsRanges, copyTo, ranges.length);
+ copyTo += ranges.length;
+ }
+ return cdsRanges;
+
}
/**
{
int origxon[], sxpos = -1, endxon = 0, cdslength = prlength * 3;
- int sxstart, sxstop; // unnecessary variables used for debugging
// first adjust range for codon start attribute
if (prlength >= 1 && exon != null)
{
int cdspos = 0;
for (int x = 0; x < exon.length && sxpos == -1; x += 2)
{
- cdspos += exon[x + 1] - exon[x] + 1;
+ cdspos += Math.abs(exon[x + 1] - exon[x]) + 1;
if (cdslength <= cdspos)
{
// advanced beyond last codon.
sxpos = x;
- sxstart = exon[x];
- sxstop = exon[x + 1];
if (cdslength != cdspos)
{
System.err
*/
package jalview.datamodel.xdb.embl;
+import jalview.bin.Cache;
+import jalview.util.ArrayUtils;
+
+import java.util.Arrays;
import java.util.Vector;
/**
- * Data model for a <loctaion> child element of a <feature> read
+ * Data model for a <location> child element of a <feature> read
* from an EMBL query reply
*
* @see embl_mapping.xml
+ * @see http://www.insdc.org/files/feature_table.html#3.4.2
*/
public class EmblFeatureLocations
{
}
/**
- * Return all location elements concerning given accession as start-end pairs
- * TODO: pass back complement and 'less than or more than' range information
- * TODO: deal with multiple accessions
+ * Return all location elements concerning given accession as start-end pairs.
+ * If the CDS feature is on the forward strand, then start <= end, if on the
+ * reverse strand then start > end.
*
* @param accession
* the accession string for which locations are requested, or null
* for all locations
- * @return null or int[] { start1, end1, ... }
+ * @return int[] { start1, end1, ... }
*/
-
- public int[] getElementRanges(String accession)
+ int[] getElementRanges(String accession)
{
int sepos = 0;
int[] se = new int[locElements.size() * 2];
- if (locationType.equalsIgnoreCase("single")) // TODO: or "simple" ?
+ if ("single".equalsIgnoreCase(locationType)
+ || "join".equalsIgnoreCase(locationType))
{
for (EmblFeatureLocElement loce : locElements)
{
BasePosition bp[] = loce.getBasePositions();
if (bp.length == 2)
{
- se[sepos++] = Integer.parseInt(bp[0].getPos());
- se[sepos++] = Integer.parseInt(bp[1].getPos());
+ try
+ {
+ int start = Integer.parseInt(bp[0].getPos());
+ int end = Integer.parseInt(bp[1].getPos());
+ se[sepos++] = start;
+ se[sepos++] = end;
+ } catch (NumberFormatException e)
+ {
+ System.err
+ .println("format error in EMBL CDS location basePosition: "
+ + e.getMessage());
+ }
}
- }
- }
- }
- else if (locationType.equalsIgnoreCase("join"))
- {
- for (EmblFeatureLocElement loce : locElements)
- {
- if (accession == null || loce.accession != null
- && accession.equals(loce.accession))
- {
- BasePosition bp[] = loce.getBasePositions();
- if (bp.length == 2)
+ else
{
- se[sepos++] = Integer.parseInt(bp[0].getPos());
- se[sepos++] = Integer.parseInt(bp[1].getPos());
+ System.err
+ .println("format error in EMBL CDS location, basePosition count = "
+ + bp.length);
}
}
}
- return se;
}
else if (locationType != null)
{
- if (jalview.bin.Cache.log != null)
+ if (Cache.log != null)
{
- jalview.bin.Cache.log
- .error("EmbleFeatureLocations.getElementRanges cannot deal with locationType=='"
+ Cache.log
+ .error("EmblFeatureLocations.getElementRanges cannot deal with locationType=='"
+ locationType + "'");
}
else
{
System.err
- .println("EmbleFeatureLocations.getElementRanges cannot deal with locationType=='"
+ .println("EmblFeatureLocations.getElementRanges cannot deal with locationType=='"
+ locationType + "'");
}
}
- // trim range if necessary.
- if (se != null && sepos != se.length)
+
+ if (sepos != se.length)
+ {
+ /*
+ * we failed to parse something - trim off null values
+ */
+ se = Arrays.copyOf(se, sepos);
+ }
+
+ /*
+ * If on the complement, reverse the ranges to [end, start, ...end1, start1].
+ * For an example of a joined complement, see (tRNA feature) CAGL0B00165r on
+ * http://www.ebi.ac.uk/ena/data/view/CR380948&display=xml
+ * http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/CR380948/emblxml
+ */
+ if (locationComplement)
{
- int[] trimmed = new int[sepos];
- System.arraycopy(se, 0, trimmed, 0, sepos);
- se = trimmed;
+ ArrayUtils.reverseIntArray(se);
}
return se;
}
--- /dev/null
+package jalview.util;
+
+public class ArrayUtils
+{
+ /**
+ * Reverse the given array 'in situ'
+ *
+ * @param arr
+ */
+ public static void reverseIntArray(int[] arr)
+ {
+ if (arr != null)
+ {
+ /*
+ * swap [k] with [end-k] up to the half way point in the array
+ * if length is odd, the middle entry is left untouched by the excitement
+ */
+ int last = arr.length - 1;
+ for (int k = 0; k < arr.length / 2; k++)
+ {
+ int temp = arr[k];
+ arr[k] = arr[last - k];
+ arr[last - k] = temp;
+ }
+ }
+ }
+}
}
return false;
}
+
+ /**
+ * Removes a specified number of positions from the start of a ranges list.
+ * For example, could be used to adjust cds ranges to allow for an incomplete
+ * start codon. Subranges are removed completely, or their start positions
+ * adjusted, until the required number of positions has been removed from the
+ * range. Reverse strand ranges are supported. The input array is not
+ * modified.
+ *
+ * @param removeCount
+ * @param ranges
+ * an array of [start, end, start, end...] positions
+ * @return a new array with the first removeCount positions removed
+ */
+ public static int[] removeStartPositions(int removeCount,
+ final int[] ranges)
+ {
+ if (removeCount <= 0)
+ {
+ return ranges;
+ }
+
+ int[] copy = Arrays.copyOf(ranges, ranges.length);
+ int sxpos = -1;
+ int cdspos = 0;
+ for (int x = 0; x < copy.length && sxpos == -1; x += 2)
+ {
+ // fixme handle reverse strand
+ cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
+ if (removeCount < cdspos)
+ {
+ /*
+ * we have removed enough, time to finish
+ */
+ sxpos = x;
+
+ /*
+ * increment start of first exon, or decrement if reverse strand
+ */
+ if (copy[x] <= copy[x + 1])
+ {
+ copy[x] = copy[x + 1] - cdspos + removeCount + 1;
+ }
+ else
+ {
+ copy[x] = copy[x + 1] + cdspos - removeCount - 1;
+ }
+ break;
+ }
+ }
+
+ if (sxpos > 0)
+ {
+ /*
+ * we dropped at least one entire sub-range - compact the array
+ */
+ int[] nxon = new int[copy.length - sxpos];
+ System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);
+ return nxon;
+ }
+ return copy;
+ }
}
import jalview.ws.ebi.EBIFetchClient;
import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
public abstract class EmblXmlSource extends EbiFileRetrievedProxy
{
-
- /**
- * Last properly parsed embl file.
+ /*
+ * JAL-1856 Embl returns this text for query not found
*/
- public EmblFile efile = null;
+ private static final String EMBL_NOT_FOUND_REPLY = "ERROR 12 No entries found.";
public EmblXmlSource()
{
public AlignmentI getEmblSequenceRecords(String emprefx, String query,
File reply) throws Exception
{
- SequenceI seqs[] = null;
- StringBuffer result = new StringBuffer();
+ EmblFile efile = null;
+ List<SequenceI> seqs = new ArrayList<SequenceI>();
+
if (reply != null && reply.exists())
{
- efile = null;
file = reply.getAbsolutePath();
- if (reply.length() > 25)
+ if (reply.length() > EMBL_NOT_FOUND_REPLY.length())
{
efile = EmblFile.getEmblFile(reply);
}
- else
- {
- result.append(MessageManager.formatMessage(
- "label.no_embl_record_found",
- new String[] { emprefx.toLowerCase(), query.trim() }));
- }
}
+
+ List<SequenceI> peptides = new ArrayList<SequenceI>();
if (efile != null)
{
for (EmblEntry entry : efile.getEntries())
{
- SequenceI[] seqparts = entry.getSequences(false, true, emprefx);
- // TODO: use !fetchNa,!fetchPeptide here instead - see todo in EmblEntry
- if (seqparts != null)
+ SequenceI seq = entry.getSequence(emprefx, peptides);
+ if (seq != null)
{
- SequenceI[] newseqs = null;
- int si = 0;
- if (seqs == null)
- {
- newseqs = new SequenceI[seqparts.length];
- }
- else
- {
- newseqs = new SequenceI[seqs.length + seqparts.length];
-
- for (; si < seqs.length; si++)
- {
- newseqs[si] = seqs[si];
- seqs[si] = null;
- }
- }
- for (int j = 0; j < seqparts.length; si++, j++)
- {
- newseqs[si] = seqparts[j].deriveSequence();
- // place DBReferences on dataset and refer
- }
- seqs = newseqs;
-
+ seqs.add(seq.deriveSequence());
+ // place DBReferences on dataset and refer
}
}
}
- else
- {
- result = null;
- }
+
AlignmentI al = null;
- if (seqs != null && seqs.length > 0)
+ if (!seqs.isEmpty())
{
- al = new Alignment(seqs);
- result.append(MessageManager.formatMessage(
- "label.embl_successfully_parsed", new String[] { emprefx }));
- results = result;
+ al = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
}
stopQuery();
return al;
--- /dev/null
+package jalview.datamodel.xdb.embl;
+
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertSame;
+
+import jalview.util.MappingUtils;
+
+import java.util.Arrays;
+import java.util.Vector;
+
+import org.testng.annotations.Test;
+
+public class EmblEntryTest
+{
+ @Test(groups = "Functional")
+ public void testGetCdsRanges()
+ {
+ EmblEntry testee = new EmblEntry();
+
+ /*
+ * Make a (CDS) Feature with 4 locations
+ */
+ EmblFeature cds = new EmblFeature();
+ Vector<EmblFeatureLocations> locs = new Vector<EmblFeatureLocations>();
+ cds.setLocations(locs);
+
+ /*
+ * single range [10-20]
+ */
+ EmblFeatureLocations loc = new EmblFeatureLocations();
+ loc.setLocationType("single");
+ loc.setLocationComplement(false);
+ Vector<EmblFeatureLocElement> elements = new Vector<EmblFeatureLocElement>();
+ EmblFeatureLocElement locElement = new EmblFeatureLocElement();
+ BasePosition b1 = new BasePosition();
+ b1.setPos("10");
+ BasePosition b2 = new BasePosition();
+ b2.setPos("20");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * complement range [30-40]
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("single");
+ loc.setLocationComplement(true);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("30");
+ b2 = new BasePosition();
+ b2.setPos("40");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * join range [50-60], [70-80]
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("join");
+ loc.setLocationComplement(false);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("50");
+ b2 = new BasePosition();
+ b2.setPos("60");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("70");
+ b2 = new BasePosition();
+ b2.setPos("80");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * complement range [90-100], [110-120]
+ * this should be the same as complement(join(90..100,110.120))
+ * which is "join 90-100 and 110-120, then complement"
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("join");
+ loc.setLocationComplement(true);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("90");
+ b2 = new BasePosition();
+ b2.setPos("100");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("110");
+ b2 = new BasePosition();
+ b2.setPos("120");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ int[] exons = testee.getCdsRanges(cds);
+ assertEquals("[10, 20, 40, 30, 50, 60, 70, 80, 120, 110, 100, 90]",
+ Arrays.toString(exons));
+ }
+
+ @Test(groups = "Functional")
+ public void testGetCdsRanges_badData()
+ {
+ EmblEntry testee = new EmblEntry();
+
+ /*
+ * Make a (CDS) Feature with 4 locations
+ */
+ EmblFeature cds = new EmblFeature();
+ Vector<EmblFeatureLocations> locs = new Vector<EmblFeatureLocations>();
+ cds.setLocations(locs);
+
+ /*
+ * single range [10-20]
+ */
+ EmblFeatureLocations loc = new EmblFeatureLocations();
+ loc.setLocationType("single");
+ loc.setLocationComplement(false);
+ Vector<EmblFeatureLocElement> elements = new Vector<EmblFeatureLocElement>();
+ EmblFeatureLocElement locElement = new EmblFeatureLocElement();
+ BasePosition b1 = new BasePosition();
+ b1.setPos("10");
+ BasePosition b2 = new BasePosition();
+ b2.setPos("20");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * single range with missing end position - should be skipped
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("single");
+ loc.setLocationComplement(false);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("30");
+ locElement.setBasePositions(new BasePosition[] { b1 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * single range with extra base position - should be skipped
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("single");
+ loc.setLocationComplement(false);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("30");
+ locElement.setBasePositions(new BasePosition[] { b1, b1, b1 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * single valid range [50-60] to finish
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("single");
+ loc.setLocationComplement(false);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("50");
+ b2 = new BasePosition();
+ b2.setPos("60");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ int[] exons = testee.getCdsRanges(cds);
+ assertEquals("[10, 20, 50, 60]", Arrays.toString(exons));
+ }
+
+ /**
+ * Test retrieval of exon locations matching an accession id
+ */
+ @Test(groups = "Functional")
+ public void testGetCdsRanges_forAccession()
+ {
+ EmblEntry testee = new EmblEntry();
+ String accession = "A1234";
+ testee.setAccession(accession);
+ /*
+ * Make a (CDS) Feature with 4 locations
+ */
+ EmblFeature cds = new EmblFeature();
+ Vector<EmblFeatureLocations> locs = new Vector<EmblFeatureLocations>();
+ cds.setLocations(locs);
+
+ /*
+ * single range [10-20] for 'this' accession
+ */
+ EmblFeatureLocations loc = new EmblFeatureLocations();
+ loc.setLocationType("single");
+ loc.setLocationComplement(false);
+ Vector<EmblFeatureLocElement> elements = new Vector<EmblFeatureLocElement>();
+ EmblFeatureLocElement locElement = new EmblFeatureLocElement();
+ locElement.setAccession(accession);
+ BasePosition b1 = new BasePosition();
+ b1.setPos("10");
+ BasePosition b2 = new BasePosition();
+ b2.setPos("20");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * complement range [30-40] - no accession
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("single");
+ loc.setLocationComplement(true);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ b1 = new BasePosition();
+ b1.setPos("30");
+ b2 = new BasePosition();
+ b2.setPos("40");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * join range [50-60] this accession, [70-80] another
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("join");
+ loc.setLocationComplement(false);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ locElement.setAccession(accession);
+ b1 = new BasePosition();
+ b1.setPos("50");
+ b2 = new BasePosition();
+ b2.setPos("60");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ locElement = new EmblFeatureLocElement();
+ locElement.setAccession("notme");
+ b1 = new BasePosition();
+ b1.setPos("70");
+ b2 = new BasePosition();
+ b2.setPos("80");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * complement range [90-100] wrong accession, [110-120] good
+ * this should be the same as complement(join(90..100,110.120))
+ * which is "join 90-100 and 110-120, then complement"
+ */
+ loc = new EmblFeatureLocations();
+ loc.setLocationType("join");
+ loc.setLocationComplement(true);
+ elements = new Vector<EmblFeatureLocElement>();
+ locElement = new EmblFeatureLocElement();
+ locElement.setAccession("wrong");
+ b1 = new BasePosition();
+ b1.setPos("90");
+ b2 = new BasePosition();
+ b2.setPos("100");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ locElement = new EmblFeatureLocElement();
+ locElement.setAccession(accession);
+ b1 = new BasePosition();
+ b1.setPos("110");
+ b2 = new BasePosition();
+ b2.setPos("120");
+ locElement.setBasePositions(new BasePosition[] { b1, b2 });
+ elements.add(locElement);
+ loc.setLocElements(elements);
+ locs.add(loc);
+
+ /*
+ * verify we pick out only ranges for A1234
+ */
+ int[] exons = testee.getCdsRanges(cds);
+ assertEquals("[10, 20, 50, 60, 120, 110]",
+ Arrays.toString(exons));
+ }
+}
--- /dev/null
+package jalview.util;
+
+import static org.testng.AssertJUnit.assertEquals;
+
+import java.util.Arrays;
+
+import org.testng.annotations.Test;
+
+public class ArrayUtilsTest
+{
+ @Test(groups="Functional")
+ public void testReverseIntArray() {
+
+ // null value: should be no exception
+ ArrayUtils.reverseIntArray((int[]) null);
+
+ // empty array: should be no exception
+ int[] arr = new int[] {};
+ ArrayUtils.reverseIntArray(arr);
+
+ // even length array
+ arr = new int[] { 1, 2, 3, 4 };
+ ArrayUtils.reverseIntArray(arr);
+ assertEquals("[4, 3, 2, 1]", Arrays.toString(arr));
+
+ // odd length array
+ arr = new int[] { 1, 2, 3, 4, 5 };
+ ArrayUtils.reverseIntArray(arr);
+ assertEquals("[5, 4, 3, 2, 1]", Arrays.toString(arr));
+ }
+}
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
-import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
import jalview.api.AlignViewportI;
import jalview.commands.EditCommand;
assertFalse(MappingUtils.contains(ranges, -45));
}
+ /**
+ * Test the method that drops positions from the start of a mapped range
+ */
+ @Test(groups = "Functional")
+ public void testRemoveStartPositions()
+ {
+ int[] ranges = new int[] { 1, 10 };
+ int[] adjusted = MappingUtils.removeStartPositions(0, ranges);
+ assertEquals("[1, 10]", Arrays.toString(adjusted));
+
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[2, 10]", Arrays.toString(adjusted));
+ assertEquals("[1, 10]", Arrays.toString(ranges));
+
+ ranges = adjusted;
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[3, 10]", Arrays.toString(adjusted));
+ assertEquals("[2, 10]", Arrays.toString(ranges));
+
+ ranges = new int[] { 2, 3, 10, 12 };
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[3, 3, 10, 12]", Arrays.toString(adjusted));
+ assertEquals("[2, 3, 10, 12]", Arrays.toString(ranges));
+
+ ranges = new int[] { 2, 2, 8, 12 };
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[8, 12]", Arrays.toString(adjusted));
+ assertEquals("[2, 2, 8, 12]", Arrays.toString(ranges));
+
+ ranges = new int[] { 2, 2, 8, 12 };
+ adjusted = MappingUtils.removeStartPositions(2, ranges);
+ assertEquals("[9, 12]", Arrays.toString(adjusted));
+ assertEquals("[2, 2, 8, 12]", Arrays.toString(ranges));
+
+ ranges = new int[] { 2, 2, 4, 4, 9, 12 };
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[4, 4, 9, 12]", Arrays.toString(adjusted));
+ assertEquals("[2, 2, 4, 4, 9, 12]", Arrays.toString(ranges));
+
+ ranges = new int[] { 2, 2, 4, 4, 9, 12 };
+ adjusted = MappingUtils.removeStartPositions(2, ranges);
+ assertEquals("[9, 12]", Arrays.toString(adjusted));
+ assertEquals("[2, 2, 4, 4, 9, 12]", Arrays.toString(ranges));
+
+ ranges = new int[] { 2, 3, 9, 12 };
+ adjusted = MappingUtils.removeStartPositions(3, ranges);
+ assertEquals("[10, 12]", Arrays.toString(adjusted));
+ assertEquals("[2, 3, 9, 12]", Arrays.toString(ranges));
+ }
+
+ /**
+ * Test the method that drops positions from the start of a mapped range, on
+ * the reverse strand
+ */
+ @Test(groups = "Functional")
+ public void testRemoveStartPositions_reverseStrand()
+ {
+ int[] ranges = new int[] { 10, 1 };
+ int[] adjusted = MappingUtils.removeStartPositions(0, ranges);
+ assertEquals("[10, 1]", Arrays.toString(adjusted));
+ assertEquals("[10, 1]", Arrays.toString(ranges));
+
+ ranges = adjusted;
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[9, 1]", Arrays.toString(adjusted));
+ assertEquals("[10, 1]", Arrays.toString(ranges));
+
+ ranges = adjusted;
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[8, 1]", Arrays.toString(adjusted));
+ assertEquals("[9, 1]", Arrays.toString(ranges));
+
+ ranges = new int[] { 12, 11, 9, 6 };
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[11, 11, 9, 6]", Arrays.toString(adjusted));
+ assertEquals("[12, 11, 9, 6]", Arrays.toString(ranges));
+
+ ranges = new int[] { 12, 12, 8, 4 };
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[8, 4]", Arrays.toString(adjusted));
+ assertEquals("[12, 12, 8, 4]", Arrays.toString(ranges));
+
+ ranges = new int[] { 12, 12, 8, 4 };
+ adjusted = MappingUtils.removeStartPositions(2, ranges);
+ assertEquals("[7, 4]", Arrays.toString(adjusted));
+ assertEquals("[12, 12, 8, 4]", Arrays.toString(ranges));
+
+ ranges = new int[] { 12, 12, 10, 10, 8, 4 };
+ adjusted = MappingUtils.removeStartPositions(1, ranges);
+ assertEquals("[10, 10, 8, 4]", Arrays.toString(adjusted));
+ assertEquals("[12, 12, 10, 10, 8, 4]", Arrays.toString(ranges));
+
+ ranges = new int[] { 12, 12, 10, 10, 8, 4 };
+ adjusted = MappingUtils.removeStartPositions(2, ranges);
+ assertEquals("[8, 4]", Arrays.toString(adjusted));
+ assertEquals("[12, 12, 10, 10, 8, 4]", Arrays.toString(ranges));
+
+ ranges = new int[] { 12, 11, 8, 4 };
+ adjusted = MappingUtils.removeStartPositions(3, ranges);
+ assertEquals("[7, 4]", Arrays.toString(adjusted));
+ assertEquals("[12, 11, 8, 4]", Arrays.toString(ranges));
+ }
+
}