import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
-import jalview.api.DBRefEntryI;
import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
* didn't find mapped CDS sequence - construct it and add
* its dataset sequence to the dataset
*/
- cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping);
- SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
+ cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,
+ dataset).deriveSequence();
+ // cdsSeq has a name constructed as CDS|<dbref>
+ // <dbref> will be either the accession for the coding sequence,
+ // marked in the /via/ dbref to the protein product accession
+ // or it will be the original nucleotide accession.
+ SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();
+
cdsSeqs.add(cdsSeq);
+
if (!dataset.getSequences().contains(cdsSeqDss))
{
+ // check if this sequence is a newly created one
+ // so needs adding to the dataset
dataset.addSequence(cdsSeqDss);
}
MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(),
mapList.getFromRatio(), mapList.getToRatio());
AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
- cdsToProteinMapping.addMap(cdsSeq, proteinProduct, cdsToProteinMap);
+ cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
+ cdsToProteinMap);
/*
* guard against duplicating the mapping if repeating this action
mappings.add(cdsToProteinMapping);
}
- /*
- * copy protein's dbrefs to CDS sequence
- * this enables Get Cross-References from CDS alignment
- */
- DBRefEntry[] proteinRefs = DBRefUtils.selectDbRefs(false,
- proteinProduct.getDBRefs());
- if (proteinRefs != null)
- {
- for (DBRefEntry ref : proteinRefs)
- {
- DBRefEntry cdsToProteinRef = new DBRefEntry(ref);
- cdsToProteinRef.setMap(new Mapping(proteinProduct,
- cdsToProteinMap));
- cdsSeqDss.addDBRef(cdsToProteinRef);
- }
- }
-
+ propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
+ proteinProduct, aMapping);
/*
* add another mapping from original 'from' range to CDS
*/
MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
cdsRange, 1,
1);
- dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeq,
+ dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,
dnaToCdsMap);
if (!mappings.contains(dnaToCdsMapping))
{
* same source and accession, so need a different accession for
* the CDS from the dna sequence
*/
- DBRefEntryI dnaRef = dnaDss.getSourceDBRef();
- if (dnaRef != null)
+
+ // specific use case:
+ // Genomic contig ENSCHR:1, contains coding regions for ENSG01,
+ // ENSG02, ENSG03, with transcripts and products similarly named.
+ // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01
+
+ // JBPNote: ?? can't actually create an example that demonstrates we
+ // need to
+ // synthesize an xref.
+
+ for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())
{
+ // creates a complementary cross-reference to the source sequence's
+ // primary reference.
+
+ DBRefEntry cdsCrossRef = new DBRefEntry(primRef.getSource(),
+ primRef.getSource() + ":" + primRef.getVersion(),
+ primRef.getAccessionId());
+ cdsCrossRef
+ .setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap)));
+ cdsSeqDss.addDBRef(cdsCrossRef);
+
+ // problem here is that the cross-reference is synthesized -
+ // cdsSeq.getName() may be like 'CDS|dnaaccession' or
+ // 'CDS|emblcdsacc'
// assuming cds version same as dna ?!?
- DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
- dnaRef.getVersion(), cdsSeq.getName());
+
+ DBRefEntry proteinToCdsRef = new DBRefEntry(
+ primRef.getSource(), primRef.getVersion(),
+ cdsSeq.getName());
+ //
proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
.getInverse()));
proteinProduct.addDBRef(proteinToCdsRef);
*
* @param seq
* @param mapping
+ * @param dataset
+ * - existing dataset. We check for sequences that look like the CDS
+ * we are about to construct, if one exists already, then we will
+ * just return that one.
* @return CDS sequence (as a dataset sequence)
*/
- static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping)
+ static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,
+ AlignmentI dataset)
{
char[] seqChars = seq.getSequence();
List<int[]> fromRanges = mapping.getMap().getFromRanges();
}
}
}
-
+
/*
* assign 'from id' held in the mapping if set (e.g. EMBL protein_id),
* else generate a sequence name
String mapFromId = mapping.getMappedFromId();
String seqId = "CDS|" + (mapFromId != null ? mapFromId : seq.getName());
SequenceI newSeq = new Sequence(seqId, newSeqChars, 1, newPos);
+ if (dataset != null)
+ {
+ SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());
+ if (matches != null)
+ {
+ boolean matched = false;
+ for (SequenceI mtch : matches)
+ {
+ if (mtch.getStart() != newSeq.getStart())
+ {
+ continue;
+ }
+ if (mtch.getEnd() != newSeq.getEnd())
+ {
+ continue;
+ }
+ if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))
+ {
+ continue;
+ }
+ if (!matched)
+ {
+ matched = true;
+ newSeq = mtch;
+ }
+ else
+ {
+ System.err
+ .println("JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"
+ + mtch.toString());
+ }
+ }
+ }
+ }
// newSeq.setDescription(mapFromId);
return newSeq;
}
/**
+ * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
+ * the given mapping.
+ *
+ * @param cdsSeq
+ * @param contig
+ * @param mapping
+ * @return list of DBRefEntrys added.
+ */
+ public static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
+ SequenceI contig, SequenceI proteinProduct, Mapping mapping)
+ {
+
+ // gather direct refs from contig congrent with mapping
+ List<DBRefEntry> direct = new ArrayList<DBRefEntry>();
+ HashSet<String> directSources = new HashSet<String>();
+ if (contig.getDBRefs() != null)
+ {
+ for (DBRefEntry dbr : contig.getDBRefs())
+ {
+ if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())
+ {
+ MapList map = dbr.getMap().getMap();
+ // check if map is the CDS mapping
+ if (mapping.getMap().equals(map))
+ {
+ direct.add(dbr);
+ directSources.add(dbr.getSource());
+ }
+ }
+ }
+ }
+ DBRefEntry[] onSource = DBRefUtils.selectRefs(
+ proteinProduct.getDBRefs(),
+ directSources.toArray(new String[0]));
+ List<DBRefEntry> propagated = new ArrayList<DBRefEntry>();
+
+ // and generate appropriate mappings
+ for (DBRefEntry cdsref : direct)
+ {
+ // clone maplist and mapping
+ MapList cdsposmap = new MapList(Arrays.asList(new int[][] { new int[]
+ { cdsSeq.getStart(), cdsSeq.getEnd() } }), cdsref.getMap().getMap()
+ .getToRanges(), 3, 1);
+ Mapping cdsmap = new Mapping(cdsref.getMap().getTo(), cdsref.getMap()
+ .getMap());
+
+ // create dbref
+ DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
+ cdsref.getVersion(), cdsref.getAccessionId(), new Mapping(
+ cdsmap.getTo(), cdsposmap));
+
+ // and see if we can map to the protein product for this mapping.
+ // onSource is the filtered set of accessions on protein that we are
+ // tranferring, so we assume accession is the same.
+ if (cdsmap.getTo() == null && onSource != null)
+ {
+ List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,
+ cdsref.getAccessionId());
+ if (sourceRefs != null)
+ {
+ for (DBRefEntry srcref : sourceRefs)
+ {
+ if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))
+ {
+ // we have found a complementary dbref on the protein product, so
+ // update mapping's getTo
+ newref.getMap().setTo(proteinProduct);
+ }
+ }
+ }
+ }
+ cdsSeq.addDBRef(newref);
+ propagated.add(newref);
+ }
+ return propagated;
+ }
+
+ /**
* Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
* feature start/end ranges, optionally omitting specified feature types.
* Returns the number of features copied.
{
AlignmentI copy = new Alignment(new Alignment(seqs));
copy.setDataset(dataset);
-
+ boolean isProtein = !copy.isNucleotide();
SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
if (xrefs != null)
{
{
for (DBRefEntry dbref : dbrefs)
{
- if (dbref.getMap() == null || dbref.getMap().getTo() == null)
+ if (dbref.getMap() == null || dbref.getMap().getTo() == null
+ || dbref.getMap().getTo().isProtein() != isProtein)
{
continue;
}
boolean found = false;
DBRefEntry[] xrfs = DBRefUtils
.selectDbRefs(!fromDna, dss.getDBRefs());
+ // ENST & ENSP comes in to both Protein and nucleotide, so we need to
+ // filter them
+ // out later.
if ((xrfs == null || xrfs.length == 0) && dataset != null)
{
/*
List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs,
source);
Iterator<DBRefEntry> refIterator = sourceRefs.iterator();
+ // At this point, if we are retrieving Ensembl, we still don't filter out
+ // ENST when looking for protein crossrefs.
while (refIterator.hasNext())
{
DBRefEntry xref = refIterator.next();
found = false;
- if (xref.hasMap())
+ // we're only interested in coding cross-references, not
+ // locus->transcript
+ if (xref.hasMap() && xref.getMap().getMap().isTripletMap())
{
SequenceI mappedTo = xref.getMap().getTo();
if (mappedTo != null)
* but findInDataset() matches ENSP when looking for Uniprot...
*/
SequenceI matchInDataset = findInDataset(xref);
+ if (matchInDataset != null && xref.getMap().getTo() != null
+ && matchInDataset != xref.getMap().getTo())
+ {
+ System.err
+ .println("Implementation problem (reopen JAL-2154): CrossRef.findInDataset seems to have recovered a different sequence than the one explicitly mapped for xref."
+ + "Found:"
+ + matchInDataset
+ + "\nExpected:"
+ + xref.getMap().getTo()
+ + "\nFor xref:"
+ + xref);
+ }
/*matcher.findIdMatch(mappedTo);*/
if (matchInDataset != null)
{
if (!rseqs.contains(matchInDataset))
{
rseqs.add(matchInDataset);
+ // need to try harder to only add unique mappings
+ if (xref.getMap().getMap().isTripletMap()
+ && dataset.getMapping(seq, matchInDataset) == null
+ && cf.getMappingBetween(seq, matchInDataset) == null)
+ {
+ // materialise a mapping for highlighting between these sequences
+ if (fromDna)
+ {
+ cf.addMap(dss, matchInDataset, xref.getMap().getMap(), xref.getMap().getMappedFromId());
+ } else {
+ cf.addMap(matchInDataset, dss, xref.getMap().getMap().getInverse(), xref.getMap().getMappedFromId());
+ }
+ }
}
refIterator.remove();
continue;
}
+ // TODO: need to determine if this should be a deriveSequence
SequenceI rsq = new Sequence(mappedTo);
rseqs.add(rsq);
- if (xref.getMap().getMap().getFromRatio() != xref.getMap()
- .getMap().getToRatio())
+ if (xref.getMap().getMap().isTripletMap())
{
// get sense of map correct for adding to product alignment.
if (fromDna)
{
SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|"
+ xref.getAccessionId());
- if (matchedSeq != null)
+ // if there was a match, check it's at least the right type of
+ // molecule!
+ if (matchedSeq != null && matchedSeq.isProtein() == fromDna)
{
if (constructMapping(seq, matchedSeq, xref, cf, fromDna))
{
SequenceI[] retrieved = null;
SequenceI dss = seq.getDatasetSequence() == null ? seq : seq
.getDatasetSequence();
+ // first filter in case we are retrieving crossrefs that have already been
+ // retrieved. this happens for cases where a database record doesn't yield
+ // protein products for CDS
+ DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+ for (SequenceI sq : dataset.getSequences())
+ {
+ boolean dupeFound = false;
+ // !fromDna means we are looking only for nucleotide sequences, not
+ // protein
+ if (sq.isProtein() == fromDna)
+ {
+ for (DBRefEntry dbr : sq.getPrimaryDBRefs())
+ {
+ for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
+ {
+ sourceRefs.remove(found);
+ dupeFound = true;
+ }
+ }
+ }
+ if (dupeFound)
+ {
+ dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+ }
+ }
+ if (sourceRefs.size() == 0)
+ {
+ // no more work to do! We already had all requested sequence records in
+ // the dataset.
+ return;
+ }
try
{
retrieved = sftch.getSequences(sourceRefs, !fromDna);
}
else
{
- matcher.add(map.getTo());
+ if (dataset.findIndex(map.getTo()) == -1)
+ {
+ dataset.addSequence(map.getTo());
+ matcher.add(map.getTo());
+ }
}
try
{
}
retrievedSequence.updatePDBIds();
rseqs.add(retrievedDss);
- dataset.addSequence(retrievedDss);
- matcher.add(retrievedDss);
+ if (dataset.findIndex(retrievedDss) == -1)
+ {
+ dataset.addSequence(retrievedDss);
+ matcher.add(retrievedDss);
+ }
}
}
}
DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna)
{
MapList mapping = null;
-
+ SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom
+ : mapFrom.getDatasetSequence();
+ SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo
+ : mapTo.getDatasetSequence();
/*
- * look for a reverse mapping, if found make its inverse
+ * look for a reverse mapping, if found make its inverse.
+ * Note - we do this on dataset sequences only.
*/
- if (mapTo.getDBRefs() != null)
+ if (dsmapTo.getDBRefs() != null)
{
- for (DBRefEntry dbref : mapTo.getDBRefs())
+ for (DBRefEntry dbref : dsmapTo.getDBRefs())
{
String name = dbref.getSource() + "|" + dbref.getAccessionId();
- if (dbref.hasMap() && mapFrom.getName().startsWith(name))
+ if (dbref.hasMap() && dsmapFrom.getName().startsWith(name))
{
/*
* looks like we've found a map from 'mapTo' to 'mapFrom'
* - invert it to make the mapping the other way
*/
MapList reverse = dbref.getMap().getMap().getInverse();
- xref.setMap(new Mapping(mapTo, reverse));
- mappings.addMap(mapFrom, mapTo, reverse);
+ xref.setMap(new Mapping(dsmapTo, reverse));
+ mappings.addMap(mapFrom, dsmapTo, reverse);
return true;
}
}
/*
* and add a reverse DbRef with the inverse mapping
*/
- if (mapFrom.getDatasetSequence() != null
- && mapFrom.getDatasetSequence().getSourceDBRef() != null)
- {
- DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
- .getSourceDBRef());
- dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
- .getInverse()));
- mapTo.addDBRef(dbref);
+ if (mapFrom.getDatasetSequence() != null && false)
+ // && mapFrom.getDatasetSequence().getSourceDBRef() != null)
+ {
+ // possible need to search primary references... except, why doesn't xref
+ // == getSourceDBRef ??
+ // DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
+ // .getSourceDBRef());
+ // dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
+ // .getInverse()));
+ // mapTo.addDBRef(dbref);
}
if (fromDna)
* </ul>
* @return true if relationship found and sequence added.
*/
- boolean searchDataset(boolean fromDna, SequenceI fromSeq,
- DBRefEntry xrf, List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
+ boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,
+ List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
boolean direct)
{
boolean found = false;
// }
if (!cands.isEmpty())
{
- if (!foundSeqs.contains(nxt))
+ if (foundSeqs.contains(nxt))
{
- found = true;
- foundSeqs.add(nxt);
- if (mappings != null && !direct)
+ continue;
+ }
+ found = true;
+ foundSeqs.add(nxt);
+ if (mappings != null && !direct)
+ {
+ /*
+ * if the matched sequence has mapped dbrefs to
+ * protein product / cdna, add equivalent mappings to
+ * our source sequence
+ */
+ for (DBRefEntry candidate : cands)
{
- /*
- * if the matched sequence has mapped dbrefs to
- * protein product / cdna, add equivalent mappings to
- * our source sequence
- */
- for (DBRefEntry candidate : cands)
+ Mapping mapping = candidate.getMap();
+ if (mapping != null)
{
- Mapping mapping = candidate.getMap();
- if (mapping != null)
+ MapList map = mapping.getMap();
+ if (mapping.getTo() != null
+ && map.getFromRatio() != map.getToRatio())
{
- MapList map = mapping.getMap();
- if (mapping.getTo() != null
- && map.getFromRatio() != map.getToRatio())
+ /*
+ * add a mapping, as from dna to peptide sequence
+ */
+ if (map.getFromRatio() == 3)
{
- /*
- * add a mapping, as from dna to peptide sequence
- */
- if (map.getFromRatio() == 3)
- {
- mappings.addMap(nxt, fromSeq, map);
- }
- else
- {
- mappings.addMap(nxt, fromSeq, map.getInverse());
- }
+ mappings.addMap(nxt, fromSeq, map);
+ }
+ else
+ {
+ mappings.addMap(nxt, fromSeq, map.getInverse());
}
}
}
* @return
*/
public boolean updateFrom(DBRefEntryI otherEntry);
+
+ /**
+ * Method to distinguish between direct and indirect database references
+ *
+ * primary references indicate the local sequence data directly corresponds
+ * with the database record. All other references are secondary. direct
+ * references indicate that part or all of the local sequence data can be
+ * mapped with another sequence, enabling annotation transfer.
+ * cross-references indicate the local sequence data can be corresponded to
+ * some other linear coordinate system via a transformation.
+ *
+ * This method is also sufficient to distinguish direct DBRefEntry mappings
+ * from other relationships - e.g. coding relationships (imply a 1:3/3:1
+ * mapping), but not transcript relationships, which imply a (possibly
+ * non-contiguous) 1:1 mapping
+ *
+ * The only way a dbref's mappings can be fully verified is via the local
+ * sequence frame, so rather than use isPrimary directly, please use
+ * SequenceI.getPrimaryDbRefs()
+ *
+ * @return true if this reference provides a primary accession for the
+ * associated sequence object
+ */
+ public boolean isPrimary();
}
package jalview.datamodel;
import jalview.analysis.AlignmentUtils;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.io.FastaFile;
import jalview.util.Comparison;
import jalview.util.MessageManager;
{
if (dataset != null)
{
+
// maintain dataset integrity
- if (snew.getDatasetSequence() != null)
- {
- getDataset().addSequence(snew.getDatasetSequence());
- }
- else
+ SequenceI dsseq = snew.getDatasetSequence();
+ if (dsseq == null)
{
// derive new sequence
SequenceI adding = snew.deriveSequence();
- getDataset().addSequence(adding.getDatasetSequence());
snew = adding;
+ dsseq = snew.getDatasetSequence();
}
+ if (getDataset().findIndex(dsseq) == -1)
+ {
+ getDataset().addSequence(dsseq);
+ }
+
}
if (sequences == null)
{
}
}
- /**
- * Adds a sequence to the alignment. Recalculates maxLength and size.
- *
- * @param snew
- */
@Override
- public void setSequenceAt(int i, SequenceI snew)
+ public SequenceI replaceSequenceAt(int i, SequenceI snew)
{
synchronized (sequences)
{
- deleteSequence(i);
- sequences.set(i, snew);
+ if (sequences.size() > i)
+ {
+ return sequences.set(i, snew);
+
+ }
+ else
+ {
+ sequences.add(snew);
+ hiddenSequences.adjustHeightSequenceAdded();
+ }
+ return null;
}
}
}
/**
+ * add dataset sequences to seq for currentSeq and any sequences it references
+ */
+ private void resolveAndAddDatasetSeq(SequenceI currentSeq,
+ Set<SequenceI> seqs, boolean createDatasetSequence)
+ {
+ if (currentSeq.getDatasetSequence() != null)
+ {
+ currentSeq = currentSeq.getDatasetSequence();
+ }
+ else
+ {
+ if (createDatasetSequence)
+ {
+ currentSeq = currentSeq.createDatasetSequence();
+ }
+ }
+ if (seqs.contains(currentSeq))
+ {
+ return;
+ }
+ List<SequenceI> toProcess = new ArrayList<SequenceI>();
+ toProcess.add(currentSeq);
+ while (toProcess.size() > 0)
+ {
+ // use a queue ?
+ SequenceI curDs = toProcess.remove(0);
+ if (seqs.contains(curDs))
+ {
+ continue;
+ }
+ seqs.add(curDs);
+ // iterate over database references, making sure we add forward referenced
+ // sequences
+ if (curDs.getDBRefs() != null)
+ {
+ for (DBRefEntry dbr : curDs.getDBRefs())
+ {
+ if (dbr.getMap() != null && dbr.getMap().getTo() != null)
+ {
+ if (dbr.getMap().getTo().getDatasetSequence() != null)
+ {
+ throw new Error("Implementation error: Map.getTo() for dbref"
+ + dbr + " is not a dataset sequence.");
+ // TODO: if this happens, could also rewrite the reference to
+ // point to new dataset sequence
+ }
+ // we recurse to add all forward references to dataset sequences via
+ // DBRefs/etc
+ toProcess.add(dbr.getMap().getTo());
+ }
+ }
+ }
+ }
+ }
+
+ /**
* Creates a new dataset for this alignment. Can only be done once - if
* dataset is not null this will not be performed.
*/
{
return;
}
- SequenceI[] seqs = new SequenceI[getHeight()];
- SequenceI currentSeq;
+ // try to avoid using SequenceI.equals at this stage, it will be expensive
+ Set<SequenceI> seqs = new jalview.util.LinkedIdentityHashSet<SequenceI>();
+
for (int i = 0; i < getHeight(); i++)
{
- currentSeq = getSequenceAt(i);
- if (currentSeq.getDatasetSequence() != null)
- {
- seqs[i] = currentSeq.getDatasetSequence();
- }
- else
+ SequenceI currentSeq = getSequenceAt(i);
+ resolveAndAddDatasetSeq(currentSeq, seqs, true);
+ }
+
+ // verify all mappings are in dataset
+ for (AlignedCodonFrame cf : codonFrameList)
+ {
+ for (SequenceToSequenceMapping ssm : cf.getMappings())
{
- seqs[i] = currentSeq.createDatasetSequence();
+ if (!seqs.contains(ssm.getFromSeq()))
+ {
+ resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false);
+ }
+ if (!seqs.contains(ssm.getMapping().getTo()))
+ {
+ resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false);
+ }
}
}
-
- dataset = new Alignment(seqs);
+ // finally construct dataset
+ dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
// move mappings to the dataset alignment
dataset.codonFrameList = this.codonFrameList;
this.codonFrameList = null;
* Used to set a particular index of the alignment with the given sequence.
*
* @param i
- * Index of sequence to be updated.
+ * Index of sequence to be updated. if i>length, sequence will be
+ * added to end, with no intervening positions.
* @param seq
- * New sequence to be inserted.
+ * New sequence to be inserted. The existing sequence at position i
+ * will be replaced.
+ * @return existing sequence (or null if i>current length)
*/
- void setSequenceAt(int i, SequenceI seq);
+ SequenceI replaceSequenceAt(int i, SequenceI seq);
/**
* Deletes a sequence from the alignment
import jalview.api.DBRefEntryI;
+import java.util.Arrays;
+
public class DBRefEntry implements DBRefEntryI
{
String source = "", version = "", accessionId = "";
+
/**
* maps from associated sequence to the database sequence's coordinate system
*/
}
-
public DBRefEntry(String source, String version, String accessionId)
{
this(source, version, accessionId, null);
String otherAccession = other.getAccessionId();
if ((accessionId == null && otherAccession != null)
|| (accessionId != null && otherAccession == null)
- || (accessionId != null && !accessionId.equalsIgnoreCase(otherAccession)))
+ || (accessionId != null && !accessionId
+ .equalsIgnoreCase(otherAccession)))
{
return false;
}
* otherwise the versions have to match
*/
String otherVersion = other.getVersion();
-
+
if ((version == null || version.equals("0") || version.endsWith(":0"))
&& otherVersion != null)
{
return accessionId;
}
-
@Override
public void setAccessionId(String accessionId)
{
this.accessionId = accessionId;
}
-
@Override
public void setSource(String source)
{
this.source = source;
}
-
@Override
public void setVersion(String version)
{
this.version = version;
}
-
@Override
public Mapping getMap()
{
{
return getSrcAccString();
}
+
+ @Override
+ public boolean isPrimary()
+ {
+ /*
+ * if a map is present, unless it is 1:1 and has no SequenceI mate, it cannot be a primary reference.
+ */
+ if (map != null)
+ {
+ if (map.getTo() != null)
+ {
+ return false;
+ }
+ if (map.getMap().getFromRatio() != map.getMap().getToRatio()
+ || map.getMap().getFromRatio() != 1)
+ {
+ return false;
+ }
+ // check map is really 1:1, no shifts allowed.
+ if (map.getMap().getFromHighest() != map.getMap().getToHighest()
+ && map.getMap().getFromLowest() != map.getMap().getToLowest()
+ && !Arrays.equals(
+ map.getMap().getFromRanges().toArray(new int[0][]),
+ map.getMap().getToRanges().toArray(new int[0][])))
+ {
+ return false;
+ }
+ }
+ if (version == null)
+ {
+ // no version string implies the reference has not been verified at all.
+ return false;
+ }
+ // tricky - this test really needs to search the sequence's set of dbrefs to
+ // see if there is a primary reference that derived this reference.
+ String ucv = version.toUpperCase();
+ for (String primsrc : Arrays.asList(DBRefSource.allSources()))
+ {
+ if (ucv.startsWith(primsrc.toUpperCase()))
+ {
+ // by convention, many secondary references inherit the primary
+ // reference's
+ // source string as a prefix for any version information from the
+ // secondary reference.
+ return false;
+ }
+ }
+ return true;
+ }
}
*/
package jalview.datamodel;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* Defines internal constants for unambiguous annotation of DbRefEntry source
* strings and describing the data retrieved from external database sources (see
/**
* UNIPROT Accession Number
*/
- public static String UNIPROT = "UNIPROT";
+ public static final String UNIPROT = "UNIPROT";
/**
* UNIPROT Entry Name
*/
- public static String UP_NAME = "UNIPROT_NAME".toUpperCase();
+ public static final String UP_NAME = "UNIPROT_NAME".toUpperCase();
/**
* Uniprot Knowledgebase/TrEMBL as served from EMBL protein products.
/**
* PDB Entry Code
*/
- public static String PDB = "PDB";
+ public static final String PDB = "PDB";
/**
* EMBL ID
*/
- public static String EMBL = "EMBL";
+ public static final String EMBL = "EMBL";
/**
* EMBLCDS ID
*/
- public static String EMBLCDS = "EMBLCDS";
+ public static final String EMBLCDS = "EMBLCDS";
/**
* PFAM ID
*/
- public static String PFAM = "PFAM";
+ public static final String PFAM = "PFAM";
/**
* RFAM ID
*/
- public static String RFAM = "RFAM";
+ public static final String RFAM = "RFAM";
/**
* GeneDB ID
public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL };
- public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB,
+ public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB,
EMBLCDSProduct, ENSEMBL }; // Ensembl ENSP* entries are protein
+
+ public static String[] allSources()
+ {
+ List<String> src = new ArrayList<String>();
+ for (Field f : DBRefSource.class.getFields())
+ {
+ if (String.class.equals(f.getType()))
+ {
+ try
+ {
+ src.add((String) f.get(null));
+ } catch (Exception x)
+ {
+ x.printStackTrace();
+ }
+ }
+ }
+ return src.toArray(new String[0]);
+ }
}
import jalview.analysis.AlignSeq;
import jalview.api.DBRefEntryI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
import jalview.util.StringUtils;
import java.util.ArrayList;
String vamsasId;
- DBRefEntryI sourceDBRef;
-
DBRefEntry[] dbrefs;
RNA rna;
seq.getEnd());
}
description = seq.getDescription();
- sourceDBRef = seq.getSourceDBRef() == null ? null : new DBRefEntry(
- seq.getSourceDBRef());
if (seq != datasetSequence)
{
setDatasetSequence(seq.getDatasetSequence());
&& datasetSequence.getSequenceFeatures() != null
&& datasetSequence.getSequenceFeatures().length > 0)
{
- System.err
- .println("Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment");
+ new Exception(
+ "Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment")
+ .printStackTrace();
}
datasetSequence.setSequenceFeatures(features);
}
@Override
public PDBEntry getPDBEntry(String pdbIdStr)
{
- if (getDatasetSequence() == null
- || getDatasetSequence().getAllPDBEntries() == null)
+ if (getDatasetSequence() != null)
+ {
+ return getDatasetSequence().getPDBEntry(pdbIdStr);
+ }
+ if (pdbIds == null)
{
return null;
}
- List<PDBEntry> entries = getDatasetSequence().getAllPDBEntries();
+ List<PDBEntry> entries = getAllPDBEntries();
for (PDBEntry entry : entries)
{
if (entry.getId().equalsIgnoreCase(pdbIdStr))
return null;
}
- @Override
- public void setSourceDBRef(DBRefEntryI dbRef)
- {
- this.sourceDBRef = dbRef;
- }
@Override
- public DBRefEntryI getSourceDBRef()
+ public List<DBRefEntry> getPrimaryDBRefs()
{
- return this.sourceDBRef;
+ if (datasetSequence!=null)
+ {
+ return datasetSequence.getPrimaryDBRefs();
+ }
+ if (dbrefs==null || dbrefs.length==0)
+ {
+ return Arrays.asList(new DBRefEntry[0]);
+ }
+ synchronized (dbrefs)
+ {
+ List<DBRefEntry> primaries = new ArrayList<DBRefEntry>();
+ DBRefEntry tmp[] = new DBRefEntry[1], res[] = null;
+ for (DBRefEntry ref : dbrefs)
+ {
+ if (!ref.isPrimary())
+ {
+ continue;
+ }
+ if (ref.hasMap())
+ {
+ MapList mp = ref.getMap().getMap();
+ if (mp.getFromLowest() > start || mp.getFromHighest() < end)
+ {
+ // map only involves a subsequence, so cannot be primary
+ continue;
+ }
+ }
+ // whilst it looks like it is a primary ref, we also sanity check type
+ if (DBRefUtils.getCanonicalName(DBRefSource.PDB).equals(
+ DBRefUtils.getCanonicalName(ref.getSource())))
+ {
+ // PDB dbrefs imply there should be a PDBEntry associated
+ // TODO: tighten PDB dbrefs
+ // formally imply Jalview has actually downlaoded and
+ // parsed the pdb file. That means there should be a cached file
+ // handle on the PDBEntry, and a real mapping between sequence and
+ // extracted sequence from PDB file
+ PDBEntry pdbentry = getPDBEntry(ref.getAccessionId());
+ if (pdbentry != null && pdbentry.getType() != null
+ && pdbentry.getType().equalsIgnoreCase("PDB"))
+ {
+ primaries.add(ref);
+ }
+ continue;
+ }
+ // check standard protein or dna sources
+ tmp[0] = ref;
+ res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
+ if (res != null && res[0] == tmp[0])
+ {
+ primaries.add(ref);
+ continue;
+ }
+ }
+ return primaries;
+ }
}
}
*/
package jalview.datamodel;
-import jalview.api.DBRefEntryI;
-
import java.util.List;
import java.util.Vector;
*/
public PDBEntry getPDBEntry(String pdbId);
- /**
- * Set the distinct source database, and accession number from which a
- * sequence and its start-end data were derived from. This is very important
- * for SIFTS mappings and must be set prior to performing SIFTS mapping.
- *
- * @param dbRef
- * the source dbRef for the sequence
- */
- public void setSourceDBRef(DBRefEntryI dbRef);
/**
- * Get the distinct source database, and accession number from which a
- * sequence and its start-end data were derived from.
+ * Get all primary database/accessions for this sequence's data. These
+ * DBRefEntry are expected to resolve to a valid record in the associated
+ * external database, either directly or via a provided 1:1 Mapping.
*
- * @return
+ * @return just the primary references (if any) for this sequence, or an empty
+ * list
*/
- public DBRefEntryI getSourceDBRef();
+ public List<DBRefEntry> getPrimaryDBRefs();
}
DBRefEntry retrievedref = new DBRefEntry(sourceDb,
getSequenceVersion(), accession);
dna.addDBRef(retrievedref);
- dna.setSourceDBRef(retrievedref);
// add map to indicate the sequence is a valid coordinate frame for the
// dbref
retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
dnaToProteinMapping.setTo(proteinSeq);
dnaToProteinMapping.setMappedFromId(proteinId);
proteinSeq.addDBRef(proteinDbRef);
- proteinSeq.setSourceDBRef(proteinDbRef);
ref.setMap(dnaToProteinMapping);
}
hasUniprotDbref = true;
DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
}
product.addDBRef(proteinToEmblProteinRef);
- product.setSourceDBRef(proteinToEmblProteinRef);
if (dnaToProteinMapping != null
&& dnaToProteinMapping.getTo() != null)
*/
else
{
- List<String> ids = new EnsemblSymbol(getDomain()).getIds(acc);
+ List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
+ getDbVersion()).getIds(acc);
for (String geneId : ids)
{
if (!geneIds.contains(geneId))
*/
protected String getGeneIdentifiersForName(String query)
{
- List<String> ids = new EnsemblSymbol(getDomain()).getIds(query);
+ List<String> ids = new EnsemblSymbol(getDomain(), getDbSource(),
+ getDbVersion()).getIds(query);
if (ids != null)
{
for (String id : ids)
{
// clunky: ensure Uniprot xref if we have one is on mapped sequence
SequenceI ds = proteinSeq.getDatasetSequence();
- ds.setSourceDBRef(proteinSeq.getSourceDBRef());
-
+ // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence()
Mapping map = new Mapping(ds, mapList);
DBRefEntry dbr = new DBRefEntry(getDbSource(),
getEnsemblDataVersion(), proteinSeq.getName(), map);
seq = seq.getDatasetSequence();
}
- EnsemblXref xrefFetcher = new EnsemblXref(getDomain());
+ EnsemblXref xrefFetcher = new EnsemblXref(getDomain(), getDbSource(),
+ getEnsemblDataVersion());
List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName());
for (DBRefEntry xref : xrefs)
{
DBRefEntry self = new DBRefEntry(getDbSource(),
getEnsemblDataVersion(), seq.getName());
seq.addDBRef(self);
- seq.setSourceDBRef(self);
}
/**
{
DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
getEnsemblDataVersion(), name);
- sq.setSourceDBRef(dbref);
+ sq.addDBRef(dbref);
}
}
if (alignment == null)
/**
* Constructor given the target domain to fetch data from
*
- * @param d
+ * @param domain
+ * @param dbName
+ * @param dbVersion
*/
- public EnsemblSymbol(String d)
+ public EnsemblSymbol(String domain, String dbName, String dbVersion)
{
- super(d);
+ super(domain, dbName, dbVersion);
}
/**
private static final String GO_GENE_ONTOLOGY = "GO";
+ private String dbName = "ENSEMBL (xref)";
+
/**
* Constructor given the target domain to fetch data from
*
* @param d
*/
- public EnsemblXref(String d)
+ public EnsemblXref(String d, String dbSource, String version)
{
super(d);
+ dbName = dbSource;
+ xrefVersion = dbSource + ":" + version;
+
}
@Override
public String getDbName()
{
- return "ENSEMBL (xref)";
+ return dbName;
}
@Override
if (dbName != null && id != null)
{
dbName = DBRefUtils.getCanonicalName(dbName);
- DBRefEntry dbref = new DBRefEntry(dbName, "0", id);
+ DBRefEntry dbref = new DBRefEntry(dbName, getXRefVersion(), id);
result.add(dbref);
}
}
return result;
}
+ private String xrefVersion = "ENSEMBL:0";
+
+ /**
+ * version string for Xrefs - for 2.10, hardwired for ENSEMBL:0
+ *
+ * @return
+ */
+ public String getXRefVersion()
+ {
+ return xrefVersion;
+ }
+
/**
* Returns the URL for the REST endpoint to fetch all cross-references for an
* identifier. Note this may return protein cross-references for nucleotide.
import jalview.api.AlignViewportI;
import jalview.api.AlignmentViewPanel;
import jalview.api.FeatureSettingsControllerI;
-import jalview.api.FeatureSettingsModelI;
import jalview.api.SplitContainerI;
import jalview.api.ViewStyleI;
import jalview.api.analysis.ScoreModelI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.ColumnSelection;
-import jalview.datamodel.DBRefSource;
import jalview.datamodel.HiddenSequences;
import jalview.datamodel.PDBEntry;
import jalview.datamodel.SeqCigar;
import jalview.io.JnetAnnotationMaker;
import jalview.io.NewickFile;
import jalview.io.TCoffeeScoreFile;
-import jalview.io.gff.SequenceOntologyI;
import jalview.jbgui.GAlignFrame;
import jalview.schemes.Blosum62ColourScheme;
import jalview.schemes.BuriedColourScheme;
import jalview.schemes.TurnColourScheme;
import jalview.schemes.UserColourScheme;
import jalview.schemes.ZappoColourScheme;
-import jalview.structure.StructureSelectionManager;
import jalview.util.MessageManager;
import jalview.viewmodel.AlignmentViewport;
import jalview.ws.DBRefFetcher;
import jalview.ws.DBRefFetcher.FetchFinishedListenerI;
-import jalview.ws.SequenceFetcher;
import jalview.ws.jws1.Discoverer;
import jalview.ws.jws2.Jws2Discoverer;
import jalview.ws.jws2.jabaws2.Jws2Instance;
protected void showProductsFor(final SequenceI[] sel,
final boolean _odna, final String source)
{
- Runnable foo = new Runnable()
- {
-
- @Override
- public void run()
- {
- final long sttime = System.currentTimeMillis();
- AlignFrame.this.setProgressBar(MessageManager.formatMessage(
- "status.searching_for_sequences_from",
- new Object[] { source }), sttime);
- try
- {
- AlignmentI alignment = AlignFrame.this.getViewport()
- .getAlignment();
- AlignmentI dataset = alignment.getDataset() == null ? alignment
- : alignment.getDataset();
- boolean dna = alignment.isNucleotide();
- if (_odna != dna)
- {
- System.err
- .println("Conflict: showProducts for alignment originally "
- + "thought to be "
- + (_odna ? "DNA" : "Protein")
- + " now searching for "
- + (dna ? "DNA" : "Protein") + " Context.");
- }
- AlignmentI xrefs = new CrossRef(sel, dataset).findXrefSequences(
- source, dna);
- if (xrefs == null)
- {
- return;
- }
- /*
- * get display scheme (if any) to apply to features
- */
- FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
- .getFeatureColourScheme(source);
-
- AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
- xrefs);
- if (!dna)
- {
- xrefsAlignment = AlignmentUtils.makeCdsAlignment(
- xrefsAlignment.getSequencesArray(), dataset, sel);
- xrefsAlignment.alignAs(alignment);
- }
-
- /*
- * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
- * sequences). If we are DNA, drop introns and update mappings
- */
- AlignmentI copyAlignment = null;
-
- if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
- {
- boolean copyAlignmentIsAligned = false;
- if (dna)
- {
- copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
- xrefsAlignment.getSequencesArray());
- if (copyAlignment.getHeight() == 0)
- {
- JOptionPane.showMessageDialog(AlignFrame.this,
- MessageManager.getString("label.cant_map_cds"),
- MessageManager.getString("label.operation_failed"),
- JOptionPane.OK_OPTION);
- System.err.println("Failed to make CDS alignment");
- }
-
- /*
- * pending getting Embl transcripts to 'align',
- * we are only doing this for Ensembl
- */
- // TODO proper criteria for 'can align as cdna'
- if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
- || AlignmentUtils.looksLikeEnsembl(alignment))
- {
- copyAlignment.alignAs(alignment);
- copyAlignmentIsAligned = true;
- }
- }
- else
- {
- copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
- xrefs.getSequencesArray(), dataset);
- }
- copyAlignment.setGapCharacter(AlignFrame.this.viewport
- .getGapCharacter());
-
- StructureSelectionManager ssm = StructureSelectionManager
- .getStructureSelectionManager(Desktop.instance);
-
- /*
- * register any new mappings for sequence mouseover etc
- * (will not duplicate any previously registered mappings)
- */
- ssm.registerMappings(dataset.getCodonFrames());
-
- if (copyAlignment.getHeight() <= 0)
- {
- System.err.println("No Sequences generated for xRef type "
- + source);
- return;
- }
- /*
- * align protein to dna
- */
- if (dna && copyAlignmentIsAligned)
- {
- xrefsAlignment.alignAs(copyAlignment);
- }
- else
- {
- /*
- * align cdna to protein - currently only if
- * fetching and aligning Ensembl transcripts!
- */
- // TODO: generalise for other sources of locus/transcript/cds data
- if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
- {
- copyAlignment.alignAs(xrefsAlignment);
- }
- }
- }
- /*
- * build AlignFrame(s) according to available alignment data
- */
- AlignFrame newFrame = new AlignFrame(xrefsAlignment,
- DEFAULT_WIDTH, DEFAULT_HEIGHT);
- if (Cache.getDefault("HIDE_INTRONS", true))
- {
- newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
- }
- String newtitle = String.format("%s %s %s",
- dna ? MessageManager.getString("label.proteins")
- : MessageManager.getString("label.nucleotides"),
- MessageManager.getString("label.for"), getTitle());
- newFrame.setTitle(newtitle);
-
- if (copyAlignment == null)
- {
- /*
- * split frame display is turned off in preferences file
- */
- Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH,
- DEFAULT_HEIGHT);
- return; // via finally clause
- }
- AlignFrame copyThis = new AlignFrame(copyAlignment,
- AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
- copyThis.setTitle(AlignFrame.this.getTitle());
-
- boolean showSequenceFeatures = viewport.isShowSequenceFeatures();
- newFrame.setShowSeqFeatures(showSequenceFeatures);
- copyThis.setShowSeqFeatures(showSequenceFeatures);
- FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas
- .getFeatureRenderer();
-
- /*
- * copy feature rendering settings to split frame
- */
- newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
- .transferSettings(myFeatureStyling);
- copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
- .transferSettings(myFeatureStyling);
-
- /*
- * apply 'database source' feature configuration
- * if any was found
- */
- // TODO is this the feature colouring for the original
- // alignment or the fetched xrefs? either could be Ensembl
- newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
- copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
-
- SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
- dna ? newFrame : copyThis);
- newFrame.setVisible(true);
- copyThis.setVisible(true);
- String linkedTitle = MessageManager
- .getString("label.linked_view_title");
- Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
- sf.adjustDivider();
- } catch (OutOfMemoryError e)
- {
- new OOMWarning("whilst fetching crossreferences", e);
- } catch (Throwable e)
- {
- Cache.log.error("Error when finding crossreferences", e);
- } finally
- {
- AlignFrame.this.setProgressBar(MessageManager.formatMessage(
- "status.finished_searching_for_sequences_from",
- new Object[] { source }), sttime);
- }
- }
-
- /**
- * Makes an alignment containing the given sequences, and adds them to the
- * given dataset, which is also set as the dataset for the new alignment
- *
- * TODO: refactor to DatasetI method
- *
- * @param dataset
- * @param seqs
- * @return
- */
- protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
- AlignmentI seqs)
- {
- SequenceI[] sprods = new SequenceI[seqs.getHeight()];
- for (int s = 0; s < sprods.length; s++)
- {
- sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
- if (dataset.getSequences() == null
- || !dataset.getSequences().contains(
- sprods[s].getDatasetSequence()))
- {
- dataset.addSequence(sprods[s].getDatasetSequence());
- }
- sprods[s].updatePDBIds();
- }
- Alignment al = new Alignment(sprods);
- al.setDataset(dataset);
- return al;
- }
-
- };
- Thread frunner = new Thread(foo);
- frunner.start();
+ new Thread(CrossRefAction.showProductsFor(sel, _odna, source, this))
+ .start();
}
/**
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui;
+
+import jalview.analysis.AlignmentUtils;
+import jalview.analysis.CrossRef;
+import jalview.api.AlignmentViewPanel;
+import jalview.api.FeatureSettingsModelI;
+import jalview.bin.Cache;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.SequenceI;
+import jalview.io.gff.SequenceOntologyI;
+import jalview.structure.StructureSelectionManager;
+import jalview.util.MessageManager;
+import jalview.ws.SequenceFetcher;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.swing.JOptionPane;
+
+/**
+ * Factory constructor and runnable for discovering and displaying
+ * cross-references for a set of aligned sequences
+ *
+ * @author jprocter
+ *
+ */
+public class CrossRefAction implements Runnable
+{
+ private AlignFrame alignFrame;
+
+ private SequenceI[] sel;
+
+ private boolean _odna;
+
+ private String source;
+
+ List<AlignmentViewPanel> xrefViews = new ArrayList<AlignmentViewPanel>();
+
+ public List<jalview.api.AlignmentViewPanel> getXrefViews()
+ {
+ return xrefViews;
+ }
+
+ @Override
+ public void run()
+ {
+ final long sttime = System.currentTimeMillis();
+ alignFrame.setProgressBar(
+ MessageManager.formatMessage(
+ "status.searching_for_sequences_from",
+ new Object[] { source }), sttime);
+ try
+ {
+ AlignmentI alignment = alignFrame.getViewport().getAlignment();
+ AlignmentI dataset = alignment.getDataset() == null ? alignment
+ : alignment.getDataset();
+ boolean dna = alignment.isNucleotide();
+ if (_odna != dna)
+ {
+ System.err
+ .println("Conflict: showProducts for alignment originally "
+ + "thought to be " + (_odna ? "DNA" : "Protein")
+ + " now searching for " + (dna ? "DNA" : "Protein")
+ + " Context.");
+ }
+ AlignmentI xrefs = new CrossRef(sel, dataset).findXrefSequences(
+ source, dna);
+ if (xrefs == null)
+ {
+ return;
+ }
+ /*
+ * get display scheme (if any) to apply to features
+ */
+ FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
+ .getFeatureColourScheme(source);
+
+ AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
+ xrefs);
+ if (!dna)
+ {
+ xrefsAlignment = AlignmentUtils.makeCdsAlignment(
+ xrefsAlignment.getSequencesArray(), dataset, sel);
+ xrefsAlignment.alignAs(alignment);
+ }
+
+ /*
+ * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
+ * sequences). If we are DNA, drop introns and update mappings
+ */
+ AlignmentI copyAlignment = null;
+
+ if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
+ {
+ boolean copyAlignmentIsAligned = false;
+ if (dna)
+ {
+ copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
+ xrefsAlignment.getSequencesArray());
+ if (copyAlignment.getHeight() == 0)
+ {
+ JOptionPane.showMessageDialog(alignFrame,
+ MessageManager.getString("label.cant_map_cds"),
+ MessageManager.getString("label.operation_failed"),
+ JOptionPane.OK_OPTION);
+ System.err.println("Failed to make CDS alignment");
+ }
+
+ /*
+ * pending getting Embl transcripts to 'align',
+ * we are only doing this for Ensembl
+ */
+ // TODO proper criteria for 'can align as cdna'
+ if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
+ || AlignmentUtils.looksLikeEnsembl(alignment))
+ {
+ copyAlignment.alignAs(alignment);
+ copyAlignmentIsAligned = true;
+ }
+ }
+ else
+ {
+ copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
+ xrefs.getSequencesArray(), dataset);
+ }
+ copyAlignment
+ .setGapCharacter(alignFrame.viewport.getGapCharacter());
+
+ StructureSelectionManager ssm = StructureSelectionManager
+ .getStructureSelectionManager(Desktop.instance);
+
+ /*
+ * register any new mappings for sequence mouseover etc
+ * (will not duplicate any previously registered mappings)
+ */
+ ssm.registerMappings(dataset.getCodonFrames());
+
+ if (copyAlignment.getHeight() <= 0)
+ {
+ System.err.println("No Sequences generated for xRef type "
+ + source);
+ return;
+ }
+ /*
+ * align protein to dna
+ */
+ if (dna && copyAlignmentIsAligned)
+ {
+ xrefsAlignment.alignAs(copyAlignment);
+ }
+ else
+ {
+ /*
+ * align cdna to protein - currently only if
+ * fetching and aligning Ensembl transcripts!
+ */
+ // TODO: generalise for other sources of locus/transcript/cds data
+ if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
+ {
+ copyAlignment.alignAs(xrefsAlignment);
+ }
+ }
+ }
+ /*
+ * build AlignFrame(s) according to available alignment data
+ */
+ AlignFrame newFrame = new AlignFrame(xrefsAlignment,
+ AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
+ if (Cache.getDefault("HIDE_INTRONS", true))
+ {
+ newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
+ }
+ String newtitle = String.format("%s %s %s",
+ dna ? MessageManager.getString("label.proteins")
+ : MessageManager.getString("label.nucleotides"),
+ MessageManager.getString("label.for"), alignFrame.getTitle());
+ newFrame.setTitle(newtitle);
+
+ if (copyAlignment == null)
+ {
+ /*
+ * split frame display is turned off in preferences file
+ */
+ Desktop.addInternalFrame(newFrame, newtitle,
+ AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
+ xrefViews.add(newFrame.alignPanel);
+ return; // via finally clause
+ }
+ AlignFrame copyThis = new AlignFrame(copyAlignment,
+ AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
+ copyThis.setTitle(alignFrame.getTitle());
+
+ boolean showSequenceFeatures = alignFrame.getViewport()
+ .isShowSequenceFeatures();
+ newFrame.setShowSeqFeatures(showSequenceFeatures);
+ copyThis.setShowSeqFeatures(showSequenceFeatures);
+ FeatureRenderer myFeatureStyling = alignFrame.alignPanel
+ .getSeqPanel().seqCanvas.getFeatureRenderer();
+
+ /*
+ * copy feature rendering settings to split frame
+ */
+ newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
+ .transferSettings(myFeatureStyling);
+ copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer()
+ .transferSettings(myFeatureStyling);
+
+ /*
+ * apply 'database source' feature configuration
+ * if any was found
+ */
+ // TODO is this the feature colouring for the original
+ // alignment or the fetched xrefs? either could be Ensembl
+ newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
+ copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
+
+ SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
+ dna ? newFrame : copyThis);
+ newFrame.setVisible(true);
+ copyThis.setVisible(true);
+ String linkedTitle = MessageManager
+ .getString("label.linked_view_title");
+ Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
+ sf.adjustDivider();
+
+ // finally add the top, then bottom frame to the view list
+ xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
+ xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
+
+ } catch (OutOfMemoryError e)
+ {
+ new OOMWarning("whilst fetching crossreferences", e);
+ } catch (Throwable e)
+ {
+ Cache.log.error("Error when finding crossreferences", e);
+ } finally
+ {
+ alignFrame.setProgressBar(MessageManager.formatMessage(
+ "status.finished_searching_for_sequences_from",
+ new Object[] { source }), sttime);
+ }
+ }
+
+ /**
+ * Makes an alignment containing the given sequences, and adds them to the
+ * given dataset, which is also set as the dataset for the new alignment
+ *
+ * TODO: refactor to DatasetI method
+ *
+ * @param dataset
+ * @param seqs
+ * @return
+ */
+ protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
+ AlignmentI seqs)
+ {
+ SequenceI[] sprods = new SequenceI[seqs.getHeight()];
+ for (int s = 0; s < sprods.length; s++)
+ {
+ sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
+ if (dataset.getSequences() == null
+ || !dataset.getSequences().contains(
+ sprods[s].getDatasetSequence()))
+ {
+ dataset.addSequence(sprods[s].getDatasetSequence());
+ }
+ sprods[s].updatePDBIds();
+ }
+ Alignment al = new Alignment(sprods);
+ al.setDataset(dataset);
+ return al;
+ }
+
+ public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel,
+ boolean _odna, String source)
+ {
+ this.alignFrame = alignFrame;
+ this.sel = sel;
+ this._odna = _odna;
+ this.source = source;
+ }
+
+ public static CrossRefAction showProductsFor(final SequenceI[] sel,
+ final boolean _odna, final String source,
+ final AlignFrame alignFrame)
+ {
+ return new CrossRefAction(alignFrame, sel, _odna, source);
+ }
+
+}
public jalview.datamodel.Mapping mp = _jmap;
@Override
+ public boolean isResolvable()
+ {
+ return super.isResolvable() && mp.getTo() != null;
+ };
+
+ @Override
boolean resolve()
{
SequenceI seq = getSrefDatasetSeq();
JSeq jseq;
Set<String> calcIdSet = new HashSet<String>();
-
+ // record the set of vamsas sequence XML POJO we create.
+ HashMap<String,Sequence> vamsasSetIds = new HashMap<String,Sequence>();
// SAVE SEQUENCES
for (final SequenceI jds : rjal.getSequences())
{
final SequenceI jdatasq = jds.getDatasetSequence() == null ? jds
: jds.getDatasetSequence();
String id = seqHash(jds);
-
- if (seqRefIds.get(id) != null)
- {
- // This happens for two reasons: 1. multiple views are being serialised.
- // 2. the hashCode has collided with another sequence's code. This DOES
- // HAPPEN! (PF00072.15.stk does this)
- // JBPNote: Uncomment to debug writing out of files that do not read
- // back in due to ArrayOutOfBoundExceptions.
- // System.err.println("vamsasSeq backref: "+id+"");
- // System.err.println(jds.getName()+"
- // "+jds.getStart()+"-"+jds.getEnd()+" "+jds.getSequenceAsString());
- // System.err.println("Hashcode: "+seqHash(jds));
- // SequenceI rsq = (SequenceI) seqRefIds.get(id + "");
- // System.err.println(rsq.getName()+"
- // "+rsq.getStart()+"-"+rsq.getEnd()+" "+rsq.getSequenceAsString());
- // System.err.println("Hashcode: "+seqHash(rsq));
- }
- else
- {
- vamsasSeq = createVamsasSequence(id, jds);
- vamsasSet.addSequence(vamsasSeq);
- seqRefIds.put(id, jds);
+ if (vamsasSetIds.get(id) == null)
+ {
+ if (seqRefIds.get(id) != null && !storeDS)
+ {
+ // This happens for two reasons: 1. multiple views are being
+ // serialised.
+ // 2. the hashCode has collided with another sequence's code. This
+ // DOES
+ // HAPPEN! (PF00072.15.stk does this)
+ // JBPNote: Uncomment to debug writing out of files that do not read
+ // back in due to ArrayOutOfBoundExceptions.
+ // System.err.println("vamsasSeq backref: "+id+"");
+ // System.err.println(jds.getName()+"
+ // "+jds.getStart()+"-"+jds.getEnd()+" "+jds.getSequenceAsString());
+ // System.err.println("Hashcode: "+seqHash(jds));
+ // SequenceI rsq = (SequenceI) seqRefIds.get(id + "");
+ // System.err.println(rsq.getName()+"
+ // "+rsq.getStart()+"-"+rsq.getEnd()+" "+rsq.getSequenceAsString());
+ // System.err.println("Hashcode: "+seqHash(rsq));
+ }
+ else
+ {
+ vamsasSeq = createVamsasSequence(id, jds);
+ vamsasSet.addSequence(vamsasSeq);
+ vamsasSetIds.put(id, vamsasSeq);
+ seqRefIds.put(id, jds);
+ }
}
-
jseq = new JSeq();
jseq.setStart(jds.getStart());
jseq.setEnd(jds.getEnd());
{
System.err
.println("Warning JAL-2154 regression: updating start/end for sequence "
- + tmpSeq.toString());
+ + tmpSeq.toString() + " to " + jseqs[i]);
}
} else {
incompleteSeqs.remove(seqId);
}
+ if (vamsasSeq.length > vi && vamsasSeq[vi].getId().equals(seqId))
+ {
+ // most likely we are reading a dataset XML document so
+ // update from vamsasSeq section of XML for this sequence
+ tmpSeq.setName(vamsasSeq[vi].getName());
+ tmpSeq.setDescription(vamsasSeq[vi].getDescription());
+ tmpSeq.setSequence(vamsasSeq[vi].getSequence());
+ vi++;
+ }
+ else
+ {
+ // reading multiple views, so vamsasSeq set is a subset of JSeq
+ multipleView = true;
+ }
tmpSeq.setStart(jseqs[i].getStart());
tmpSeq.setEnd(jseqs[i].getEnd());
tmpseqs.add(tmpSeq);
- multipleView = true;
}
else
{
{
// load sequence features, database references and any associated PDB
// structures for the alignment
+ //
+ // prior to 2.10, this part would only be executed the first time a
+ // sequence was encountered, but not afterwards.
+ // now, for 2.10 projects, this is also done if the xml doc includes
+ // dataset sequences not actually present in any particular view.
+ //
for (int i = 0; i < vamsasSeq.length; i++)
{
if (jseqs[i].getFeaturesCount() > 0)
}
}
-
- al.getSequenceAt(i).getDatasetSequence().addSequenceFeature(sf);
+ // adds feature to datasequence's feature set (since Jalview 2.10)
+ al.getSequenceAt(i).addSequenceFeature(sf);
}
}
if (vamsasSeq[i].getDBRefCount() > 0)
{
- addDBRefs(al.getSequenceAt(i).getDatasetSequence(), vamsasSeq[i]);
+ // adds dbrefs to datasequence's set (since Jalview 2.10)
+ addDBRefs(
+ al.getSequenceAt(i).getDatasetSequence() == null ? al.getSequenceAt(i)
+ : al.getSequenceAt(i).getDatasetSequence(),
+ vamsasSeq[i]);
}
if (jseqs[i].getPdbidsCount() > 0)
{
}
StructureSelectionManager.getStructureSelectionManager(
Desktop.instance).registerPDBEntry(entry);
- al.getSequenceAt(i).getDatasetSequence().addPDBId(entry);
+ // adds PDBEntry to datasequence's set (since Jalview 2.10)
+ if (al.getSequenceAt(i).getDatasetSequence() != null)
+ {
+ al.getSequenceAt(i).getDatasetSequence().addPDBId(entry);
+ }
+ else
+ {
+ al.getSequenceAt(i).addPDBId(entry);
+ }
}
}
}
if (maps[m].getMapping() != null)
{
mapping = addMapping(maps[m].getMapping());
- }
- if (dnaseq != null && mapping.getTo() != null)
- {
- cf.addMap(dnaseq, mapping.getTo(), mapping.getMap());
- }
- else
- {
- // defer to later
- frefedSequence.add(newAlcodMapRef(maps[m].getDnasq(), cf,
- mapping));
+ if (dnaseq != null && mapping.getTo() != null)
+ {
+ cf.addMap(dnaseq, mapping.getTo(), mapping.getMap());
+ }
+ else
+ {
+ // defer to later
+ frefedSequence.add(newAlcodMapRef(maps[m].getDnasq(), cf,
+ mapping));
+ }
}
}
al.addCodonFrame(cf);
for (int i = 0, iSize = vamsasSet.getSequenceCount(); i < iSize; i++)
{
Sequence vamsasSeq = vamsasSet.getSequence(i);
- ensureJalviewDatasetSequence(vamsasSeq, ds, dseqs, ignoreUnrefed);
+ ensureJalviewDatasetSequence(vamsasSeq, ds, dseqs, ignoreUnrefed, i);
}
// create a new dataset
if (ds == null)
* dataset alignment
* @param dseqs
* vector to add new dataset sequence to
+ * @param ignoreUnrefed
+ * - when true, don't create new sequences from vamsasSeq if it's id
+ * doesn't already have an asssociated Jalview sequence.
+ * @param vseqpos
+ * - used to reorder the sequence in the alignment according to the
+ * vamsasSeq array ordering, to preserve ordering of dataset
*/
private void ensureJalviewDatasetSequence(Sequence vamsasSeq,
- AlignmentI ds, Vector dseqs, boolean ignoreUnrefed)
+ AlignmentI ds, Vector dseqs, boolean ignoreUnrefed, int vseqpos)
{
// JBP TODO: Check this is called for AlCodonFrames to support recovery of
// xRef Codon Maps
SequenceI sq = seqRefIds.get(vamsasSeq.getId());
+ boolean reorder = false;
SequenceI dsq = null;
if (sq != null && sq.getDatasetSequence() != null)
{
dsq = sq.getDatasetSequence();
}
+ else
+ {
+ reorder = true;
+ }
if (sq == null && ignoreUnrefed)
{
return;
// + (post ? "appended" : ""));
}
}
+ else
+ {
+ // sequence refs are identical. We may need to update the existing dataset
+ // alignment with this one, though.
+ if (ds != null && dseqs == null)
+ {
+ int opos = ds.findIndex(dsq);
+ SequenceI tseq = null;
+ if (opos != -1 && vseqpos != opos)
+ {
+ // remove from old position
+ ds.deleteSequence(dsq);
+ }
+ if (vseqpos < ds.getHeight())
+ {
+ if (vseqpos != opos)
+ {
+ // save sequence at destination position
+ tseq = ds.getSequenceAt(vseqpos);
+ ds.replaceSequenceAt(vseqpos, dsq);
+ ds.addSequence(tseq);
+ }
+ }
+ else
+ {
+ ds.addSequence(dsq);
+ }
+ }
+ }
}
/*
import java.awt.event.KeyEvent;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
private IProgressIndicator progressIndicator;
+ private volatile boolean _isConstructing = false;
+
+ private List<AlignFrame> newAlframes = null;
+
public SequenceFetcher(IProgressIndicator guiIndic)
{
+ this(guiIndic, null, null);
+ }
+
+ public SequenceFetcher(IProgressIndicator guiIndic,
+ final String selectedDb, final String queryString)
+ {
+ this._isConstructing=true;
this.progressIndicator = guiIndic;
final SequenceFetcher us = this;
// launch initialiser thread
{
if (getSequenceFetcherSingleton(progressIndicator) != null)
{
- us.initGui(progressIndicator);
+ us.initGui(progressIndicator, selectedDb, queryString);
+ us._isConstructing=false;
}
else
{
});
sf.start();
}
+ /**
+ * blocking call which creates a new sequence fetcher panel, configures it and presses the OK button with the given database and query.
+ * @param database
+ * @param query
+ */
+ public static List<AlignFrame> fetchAndShow(String database, String query)
+ {
+ final SequenceFetcher sf = new SequenceFetcher(Desktop.instance, database, query);
+ while (sf._isConstructing)
+ {
+ try { Thread.sleep(50);
+ } catch (Exception q)
+ {
+ return Collections.emptyList();
+ }
+ }
+ sf.newAlframes = new ArrayList<AlignFrame>();
+ sf.run();
+ return sf.newAlframes;
+ }
private class DatabaseAuthority extends DefaultMutableTreeNode
{
{
};
+
+ /**
+ * initialise the database and query for this fetcher panel
+ *
+ * @param selectedDb
+ * - string that should correspond to a sequence fetcher
+ * @param queryString
+ * - string that will be entered in the query dialog
+ * @return true if UI was configured with valid database and query string
+ */
+ protected boolean setInitialQuery(String selectedDb, String queryString)
+ {
+ if (selectedDb == null || selectedDb.trim().length() == 0)
+ {
+ return false;
+ }
+ try
+ {
+ List<DbSourceProxy> sp = sfetch.getSourceProxy(selectedDb);
+ for (DbSourceProxy sourcep : sp)
+ {
+ if (sourcep.getTier() == 0)
+ {
+ database.selection = Arrays
+ .asList(new DbSourceProxy[] { sourcep });
+ break;
+ }
+ }
+ if (database.selection == null || database.selection.size() == 0)
+ {
+ System.err.println("Ignoring fetch parameter db='" + selectedDb
+ + "'");
+ return false;
+ }
+ textArea.setText(queryString);
+ } catch (Exception q)
+ {
+ System.err.println("Ignoring fetch parameter db='" + selectedDb
+ + "' and query='" + queryString + "'");
+ return false;
+ }
+ return true;
+ }
/**
* called by thread spawned by constructor
*
* @param guiWindow
+ * @param queryString
+ * @param selectedDb
*/
- private void initGui(IProgressIndicator guiWindow)
+ private void initGui(IProgressIndicator guiWindow, String selectedDb,
+ String queryString)
{
this.guiWindow = guiWindow;
if (guiWindow instanceof AlignFrame)
try
{
jbInit();
+ /*
+ * configure the UI with any query parameters we were called with
+ */
+ if (!setInitialQuery(selectedDb, queryString))
+ {
+ /*
+ * none provided, so show the database chooser
+ */
+ database.waitForInput();
+ }
} catch (Exception ex)
{
ex.printStackTrace();
this.add(jPanel3, java.awt.BorderLayout.CENTER);
this.add(jPanel2, java.awt.BorderLayout.NORTH);
jScrollPane1.getViewport().add(textArea);
-
- /*
- * open the database tree
- */
- database.waitForInput();
}
private void pdbSourceAction()
{
af.hideFeatureColumns(SequenceOntologyI.EXON, false);
}
-
+ if (newAlframes != null)
+ {
+ newAlframes.add(af);
+ }
Desktop.addInternalFrame(af, title, AlignFrame.DEFAULT_WIDTH,
AlignFrame.DEFAULT_HEIGHT);
ArrayList<SequenceI> seqsWithoutSourceDBRef = new ArrayList<SequenceI>();
for (SequenceI seq : sequences)
{
- if (seq.getSourceDBRef() == null && seq.getDBRefs() == null)
+ if (seq.getPrimaryDBRefs().size() == 0)
{
seqsWithoutSourceDBRef.add(seq);
continue;
DBRefEntry sourceDBRef = new DBRefEntry();
sourceDBRef.setAccessionId(getId());
sourceDBRef.setSource(DBRefSource.PDB);
- pdbSequence.setSourceDBRef(sourceDBRef);
+ // TODO: specify version for 'PDB' database ref if it is read from a file.
+ // TODO: decide if jalview.io should be creating primary refs!
+ sourceDBRef.setVersion("");
pdbSequence.addPDBId(entry);
pdbSequence.addDBRef(sourceDBRef);
SequenceI chainseq = pdbSequence;
}
ArrayList<StructureMapping> seqToStrucMapping = new ArrayList<StructureMapping>();
- if (isMapUsingSIFTs)
+ if (isMapUsingSIFTs && seq.isProtein())
{
setProgressBar(null);
setProgressBar(MessageManager
return "cif".equalsIgnoreCase(fileExt);
}
+ /**
+ * retrieve a mapping for seq from SIFTs using associated DBRefEntry for
+ * uniprot or PDB
+ *
+ * @param seq
+ * @param pdbFile
+ * @param targetChainId
+ * @param pdb
+ * @param maxChain
+ * @param sqmpping
+ * @param maxAlignseq
+ * @return
+ * @throws SiftsException
+ */
private StructureMapping getStructureMapping(SequenceI seq,
String pdbFile, String targetChainId, StructureFile pdb,
PDBChain maxChain, jalview.datamodel.Mapping sqmpping,
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import com.stevesoft.pat.Regex;
canonicalSourceNameLookup.put("pdb", DBRefSource.PDB);
canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL);
+ // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served
+ // from ENA.
+ canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL);
+ canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
+
+ // Make sure we have lowercase entries for all canonical string lookups
+ Set<String> keys = canonicalSourceNameLookup.keySet();
+ for (String k : keys)
+ {
+ canonicalSourceNameLookup.put(k.toLowerCase(),
+ canonicalSourceNameLookup.get(k));
+ }
dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() == null
- || refb.getSource().equals(refa.getSource()))
+ || DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
if (refa.getVersion() == null
|| refb.getVersion().equals(refa.getVersion()))
@Override
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
- if (nullOrEqual(refa.getSource(), refb.getSource())
+ if (nullOrEqualSource(refa.getSource(), refb.getSource())
&& nullOrEqual(refa.getVersion(), refb.getVersion())
&& nullOrEqual(refa.getAccessionId(), refb.getAccessionId())
&& nullOrEqual(refa.getMap(), refb.getMap()))
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
if (refa.getAccessionId() != null && refb.getAccessionId() != null
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
if (refa.getAccessionId() != null && refb.getAccessionId() != null
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
// if ((refa.getVersion()==null || refb.getVersion()==null)
public boolean matches(DBRefEntry refa, DBRefEntry refb)
{
if (refa.getSource() != null && refb.getSource() != null
- && refb.getSource().equals(refa.getSource()))
+ && DBRefUtils.getCanonicalName(refb.getSource()).equals(
+ DBRefUtils.getCanonicalName(refa.getSource())))
{
// We dont care about version
{
return true;
}
- return (o1 == null ? o2.equals(o1) : o1.equals(o2));
+ return o1.equals(o2);
+ }
+
+ /**
+ * canonicalise source string before comparing. null is always wildcard
+ *
+ * @param o1
+ * - null or source string to compare
+ * @param o2
+ * - null or source string to compare
+ * @return true if either o1 or o2 are null, or o1 equals o2 under
+ * DBRefUtils.getCanonicalName
+ * (o1).equals(DBRefUtils.getCanonicalName(o2))
+ */
+ public static boolean nullOrEqualSource(String o1, String o2)
+ {
+ if (o1 == null || o2 == null)
+ {
+ return true;
+ }
+ return DBRefUtils.getCanonicalName(o1).equals(
+ DBRefUtils.getCanonicalName(o2));
}
/**
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.util;
+
+import java.util.AbstractSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+
+/**
+ * Order preserving Set based on System.identityHashCode() for an object, which
+ * also supports Object->index lookup.
+ *
+ * @author Jim Procter (2016) based on Evgeniy Dorofeev's response: via
+ * https://stackoverflow.com/questions/17276658/linkedidentityhashset
+ *
+ */
+public class LinkedIdentityHashSet<E> extends AbstractSet<E>
+{
+ LinkedHashMap<IdentityWrapper, IdentityWrapper> set = new LinkedHashMap<IdentityWrapper, IdentityWrapper>();
+
+ static class IdentityWrapper
+ {
+ Object obj;
+
+ public int p;
+
+ IdentityWrapper(Object obj, int p)
+ {
+ this.obj = obj;
+ this.p = p;
+ }
+
+ @Override
+ public boolean equals(Object obj)
+ {
+ return this.obj == obj;
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return System.identityHashCode(obj);
+ }
+ }
+
+ @Override
+ public boolean add(E e)
+ {
+ IdentityWrapper el = (new IdentityWrapper(e, set.size()));
+ return set.putIfAbsent(el, el) == null;
+ }
+
+ @Override
+ public Iterator<E> iterator()
+ {
+ return new Iterator<E>()
+ {
+ final Iterator<IdentityWrapper> se = set.keySet().iterator();
+
+ @Override
+ public boolean hasNext()
+ {
+ return se.hasNext();
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public E next()
+ {
+ return (E) se.next().obj;
+ }
+ };
+ }
+
+ @Override
+ public int size()
+ {
+ return set.size();
+ }
+
+ /**
+ * Lookup the index for e in the set
+ *
+ * @param e
+ * @return position of e in the set when it was added.
+ */
+ public int indexOf(E e)
+ {
+ return set.get(e).p;
+ }
+}
return forwardStrand;
}
+ /**
+ *
+ * @return true if from, or to is a three to 1 mapping
+ */
+ public boolean isTripletMap()
+ {
+ return (toRatio == 3 && fromRatio == 1)
+ || (fromRatio == 3 && toRatio == 1);
+ }
+
}
srces.addAll(srcesfordb);
}
}
+ // append the PDB data source, since it is 'special', catering for both
+ // nucleotide and protein
+ srces.addAll(sfetcher.getSourceProxy(DBRefSource.PDB));
// append the selected sequence sources to the default dbs
srces.addAll(selsources);
final int sequenceStart = sequence.getStart();
if (absStart == -1)
{
- // Is local sequence contained in dataset sequence?
+ // couldn't find local sequence in sequence from database, so check if
+ // the database sequence is a subsequence of local sequence
absStart = nonGapped.indexOf(entrySeq);
if (absStart == -1)
- { // verification failed.
+ {
+ // verification failed. couldn't find any relationship between
+ // entrySeq and local sequence
messages.append(sequence.getName()
+ " SEQUENCE NOT %100 MATCH \n");
continue;
}
+ /*
+ * found match for the whole of the database sequence within the local
+ * sequence's reference frame.
+ */
transferred = true;
sbuffer.append(sequence.getName() + " HAS " + absStart
+ " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n");
- //
- // + " - ANY SEQUENCE FEATURES"
- // + " HAVE BEEN ADJUSTED ACCORDINGLY \n");
- // absStart = 0;
- // create valid mapping between matching region of local sequence and
- // the mapped sequence
+
+ /*
+ * So create a mapping to the external entry from the matching region of
+ * the local sequence, and leave local start/end untouched.
+ */
mp = new Mapping(null, new int[] { sequenceStart + absStart,
sequenceStart + absStart + entrySeq.length() - 1 }, new int[]
{ entry.getStart(), entry.getStart() + entrySeq.length() - 1 },
1, 1);
- updateRefFrame = false; // mapping is based on current start/end so
- // don't modify start and end
+ updateRefFrame = false;
}
else
{
+ /*
+ * found a match for the local sequence within sequence from
+ * the external database
+ */
transferred = true;
+
// update start and end of local sequence to place it in entry's
// reference frame.
// apply identity map map from whole of local sequence to matching
// absStart+sequence.getStart()+entrySeq.length()-1},
// new int[] { entry.getStart(), entry.getEnd() }, 1, 1);
// relocate local features for updated start
+
if (updateRefFrame)
{
if (sequence.getSequenceFeatures() != null)
{
+ /*
+ * relocate existing sequence features by offset
+ */
SequenceFeature[] sf = sequence.getSequenceFeatures();
int start = sequenceStart;
int end = sequence.getEnd();
System.out.println("Adding dbrefs to " + sequence.getName()
+ " from " + dbSource + " sequence : " + entry.getName());
sequence.transferAnnotation(entry, mp);
- // unknownSequences.remove(sequence);
+
absStart += entry.getStart();
int absEnd = absStart + nonGapped.length() - 1;
if (!trimDatasetSeqs)
{
DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
accessionId);
+
+ // mark dbRef as a primary reference for this sequence
dbRefs.add(dbRef);
}
- sequence.setSourceDBRef((dbRefs != null && dbRefs.size() > 0) ? dbRefs
- .get(0) : null);
Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
for (PDBEntry pdb : entry.getDbReference())
public DBRefEntryI getValidSourceDBRef(SequenceI seq)
throws SiftsException
{
- DBRefEntryI sourceDBRef = null;
- sourceDBRef = seq.getSourceDBRef();
- if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
+ DBRefEntry[] dbRefs = seq.getDBRefs();
+ if (dbRefs == null || dbRefs.length < 1)
{
- return sourceDBRef;
+ throw new SiftsException(
+ "Source DBRef could not be determined. DBRefs might not have been retrieved.");
}
- else
+
+ for (DBRefEntryI dbRef : dbRefs)
{
- DBRefEntry[] dbRefs = seq.getDBRefs();
- if (dbRefs == null || dbRefs.length < 1)
+ if (dbRef == null || dbRef.getAccessionId() == null
+ || dbRef.getSource() == null)
{
- throw new SiftsException(
- "Source DBRef could not be determined. DBRefs might not have been retrieved.");
+ continue;
}
-
- for (DBRefEntryI dbRef : dbRefs)
+ if (isValidDBRefEntry(dbRef)
+ && dbRef.isPrimary()
+ && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
+ .getSource().equalsIgnoreCase(DBRefSource.PDB)))
{
- if (dbRef == null || dbRef.getAccessionId() == null
- || dbRef.getSource() == null)
- {
- continue;
- }
- if (isFoundInSiftsEntry(dbRef.getAccessionId())
- && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
- .getSource().equalsIgnoreCase(DBRefSource.PDB)))
- {
- seq.setSourceDBRef(dbRef);
- return dbRef;
- }
+ return dbRef;
}
}
- if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
- {
- return sourceDBRef;
- }
throw new SiftsException("Could not get source DB Ref");
}
String originalSeq = AlignSeq.extractGaps(
jalview.util.Comparison.GapChars, seq.getSequenceAsString());
HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
- DBRefEntryI sourceDBRef = seq.getSourceDBRef();
+ DBRefEntryI sourceDBRef;
sourceDBRef = getValidSourceDBRef(seq);
// TODO ensure sequence start/end is in the same coordinate system and
// consistent with the choosen sourceDBRef
/*
* need a sourceDbRef if we are to construct dbrefs to the CDS
- * sequence
+ * sequence from the dna contig sequences
*/
DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
- dna1.getDatasetSequence().setSourceDBRef(dbref);
+ dna1.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
- dna2.getDatasetSequence().setSourceDBRef(dbref);
+ dna2.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
/*
* CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
* dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
*/
- MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+ MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
- acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
+ mapfordna1);
dna.addCodonFrame(acf);
- map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+ MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
+ new int[] { 1, 3 },
3, 1);
acf = new AlignedCodonFrame();
- acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
+ mapfordna2);
dna.addCodonFrame(acf);
/*
+ * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation
+ */
+ DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
+ new Mapping(mapfordna1));
+ dna1.getDatasetSequence().addDBRef(dna1xref);
+ DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
+ new Mapping(mapfordna2));
+ dna2.getDatasetSequence().addDBRef(dna2xref);
+
+ /*
* execute method under test:
*/
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
* verify CDS has a dbref with mapping to peptide
*/
assertNotNull(cds1Dss.getDBRefs());
- assertEquals(1, cds1Dss.getDBRefs().length);
+ assertEquals(2, cds1Dss.getDBRefs().length);
dbref = cds1Dss.getDBRefs()[0];
- assertEquals("UNIPROT", dbref.getSource());
- assertEquals("0", dbref.getVersion());
- assertEquals("pep1", dbref.getAccessionId());
+ assertEquals(dna1xref.getSource(), dbref.getSource());
+ // version is via ensembl's primary ref
+ assertEquals(dna1xref.getVersion(), dbref.getVersion());
+ assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
assertNotNull(dbref.getMap());
assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
MapList cdsMapping = new MapList(new int[] { 1, 6 },
* verify peptide has added a dbref with reverse mapping to CDS
*/
assertNotNull(pep1.getDBRefs());
+ // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
assertEquals(2, pep1.getDBRefs().length);
dbref = pep1.getDBRefs()[1];
assertEquals("ENSEMBL", dbref.getSource());
* Just the protein refs:
*/
found = DBRefUtils.selectDbRefs(false, refs);
- assertEquals(5, found.length);
+ assertEquals(4, found.length);
assertSame(ref1, found[0]);
assertSame(ref2, found[1]);
- assertSame(ref3, found[2]);
- assertSame(ref4, found[3]);
- assertSame(ref9, found[4]);
+ assertSame(ref4, found[2]);
+ assertSame(ref9, found[3]);
}
/**
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.io.AppletFormatAdapter;
import jalview.io.FormatAdapter;
import jalview.util.MapList;
import java.util.Iterator;
import java.util.List;
+import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
return a;
}
+ /**
+ * assert wrapper: tests all references in the given alignment are consistent
+ *
+ * @param alignment
+ */
+ public static void assertAlignmentDatasetRefs(AlignmentI alignment)
+ {
+ verifyAlignmentDatasetRefs(alignment, true, null);
+ }
+
+ /**
+ * assert wrapper: tests all references in the given alignment are consistent
+ *
+ * @param alignment
+ * @param message
+ * - prefixed to any assert failed messages
+ */
+ public static void assertAlignmentDatasetRefs(AlignmentI alignment,
+ String message)
+ {
+ verifyAlignmentDatasetRefs(alignment, true, message);
+ }
+
+ /**
+ * verify sequence and dataset references are properly contained within
+ * dataset
+ *
+ * @param alignment
+ * - the alignmentI object to verify (either alignment or dataset)
+ * @param raiseAssert
+ * - when set, testng assertions are raised.
+ * @param message
+ * - null or a string message to prepend to the assert failed messages.
+ * @return true if alignment references were in order, otherwise false.
+ */
+ public static boolean verifyAlignmentDatasetRefs(AlignmentI alignment,
+ boolean raiseAssert, String message)
+ {
+ if (message==null) { message = ""; }
+ if (alignment == null)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message+"Alignment for verification was null.");
+ }
+ return false;
+ }
+ if (alignment.getDataset() != null)
+ {
+ AlignmentI dataset = alignment.getDataset();
+ // check all alignment sequences have their dataset within the dataset
+ for (SequenceI seq : alignment.getSequences())
+ {
+ SequenceI seqds = seq.getDatasetSequence();
+ if (seqds.getDatasetSequence() != null)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message+" Alignment contained a sequence who's dataset sequence has a second dataset reference.");
+ }
+ return false;
+ }
+ if (dataset.findIndex(seqds) == -1)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message+" Alignment contained a sequence who's dataset sequence was not in the dataset.");
+ }
+ return false;
+ }
+ }
+ return verifyAlignmentDatasetRefs(alignment.getDataset(), raiseAssert, message);
+ }
+ else
+ {
+ int dsp = -1;
+ // verify all dataset sequences
+ for (SequenceI seqds : alignment.getSequences())
+ {
+ dsp++;
+ if (seqds.getDatasetSequence() != null)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message+" Dataset contained a sequence with non-null dataset reference (ie not a dataset sequence!)");
+ }
+ return false;
+ }
+ int foundp = alignment.findIndex(seqds);
+ if (foundp != dsp)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message
+ + " Dataset sequence array contains a reference at "
+ + dsp + " to a sequence first seen at " + foundp + " ("
+ + seqds.toString() + ")");
+ }
+ return false;
+ }
+ if (seqds.getDBRefs() != null)
+ {
+ for (DBRefEntry dbr : seqds.getDBRefs())
+ {
+ if (dbr.getMap() != null)
+ {
+ SequenceI seqdbrmapto = dbr.getMap().getTo();
+ if (seqdbrmapto != null)
+ {
+ if (seqdbrmapto.getDatasetSequence() != null)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message+" DBRefEntry for sequence in alignment had map to sequence which was not a dataset sequence");
+ }
+ return false;
+
+ }
+ if (alignment.findIndex(dbr.getMap().getTo()) == -1)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message+" DBRefEntry for sequence in alignment had map to sequence not in dataset");
+ }
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+ // finally, verify codonmappings involve only dataset sequences.
+ if (alignment.getCodonFrames() != null)
+ {
+ for (AlignedCodonFrame alc : alignment.getCodonFrames())
+ {
+ for (SequenceToSequenceMapping ssm : alc.getMappings())
+ {
+ if (ssm.getFromSeq().getDatasetSequence() != null)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message+" CodonFrame-SSM-FromSeq is not a dataset sequence");
+ }
+ return false;
+ }
+ if (alignment.findIndex(ssm.getFromSeq()) == -1)
+ {
+
+ if (raiseAssert)
+ {
+ Assert.fail(message+" CodonFrame-SSM-FromSeq is not contained in dataset");
+ }
+ return false;
+ }
+ if (ssm.getMapping().getTo().getDatasetSequence() != null)
+ {
+ if (raiseAssert)
+ {
+ Assert.fail(message+" CodonFrame-SSM-Mapping-ToSeq is not a dataset sequence");
+ }
+ return false;
+ }
+ if (alignment.findIndex(ssm.getMapping().getTo()) == -1)
+ {
+
+ if (raiseAssert)
+ {
+ Assert.fail(message+" CodonFrame-SSM-Mapping-ToSeq is not contained in dataset");
+ }
+ return false;
+ }
+ }
+ }
+ }
+ }
+ return true; // all relationships verified!
+ }
+
+ /**
+ * call verifyAlignmentDatasetRefs with and without assertion raising enabled,
+ * to check expected pass/fail actually occurs in both conditions
+ *
+ * @param al
+ * @param expected
+ * @param msg
+ */
+ private void assertVerifyAlignment(AlignmentI al, boolean expected,
+ String msg)
+ {
+ if (expected)
+ {
+ try
+ {
+
+ Assert.assertTrue(verifyAlignmentDatasetRefs(al, true, null),
+ "Valid test alignment failed when raiseAsserts enabled:"
+ + msg);
+ } catch (AssertionError ae)
+ {
+ ae.printStackTrace();
+ Assert.fail(
+ "Valid test alignment raised assertion errors when raiseAsserts enabled: "
+ + msg, ae);
+ }
+ // also check validation passes with asserts disabled
+ Assert.assertTrue(verifyAlignmentDatasetRefs(al, false, null),
+ "Valid test alignment tested false when raiseAsserts disabled:"
+ + msg);
+ }
+ else
+ {
+ boolean assertRaised = false;
+ try
+ {
+ verifyAlignmentDatasetRefs(al, true, null);
+ } catch (AssertionError ae)
+ {
+ // expected behaviour
+ assertRaised = true;
+ }
+ if (!assertRaised)
+ {
+ Assert.fail("Invalid test alignment passed when raiseAsserts enabled:"
+ + msg);
+ }
+ // also check validation passes with asserts disabled
+ Assert.assertFalse(verifyAlignmentDatasetRefs(al, false, null),
+ "Invalid test alignment tested true when raiseAsserts disabled:"
+ + msg);
+ }
+ }
+ @Test(groups = { "Functional" })
+ public void testVerifyAlignmentDatasetRefs()
+ {
+ SequenceI sq1 = new Sequence("sq1", "ASFDD"), sq2 = new Sequence("sq2",
+ "TTTTTT");
+
+ // construct simple valid alignment dataset
+ Alignment al = new Alignment(new SequenceI[] {
+ sq1, sq2 });
+ // expect this to pass
+ assertVerifyAlignment(al, true, "Simple valid alignment didn't verify");
+
+ // check test for sequence->datasetSequence validity
+ sq1.setDatasetSequence(sq2);
+ assertVerifyAlignment(
+ al,
+ false,
+ "didn't detect dataset sequence with a dataset sequence reference.");
+
+ sq1.setDatasetSequence(null);
+ assertVerifyAlignment(
+ al,
+ true,
+ "didn't reinstate validity after nulling dataset sequence dataset reference");
+
+ // now create dataset and check again
+ al.createDatasetAlignment();
+ assertNotNull(al.getDataset());
+
+ assertVerifyAlignment(al, true,
+ "verify failed after createDatasetAlignment");
+
+ // create a dbref on sq1 with a sequence ref to sq2
+ DBRefEntry dbrs1tos2 = new DBRefEntry("UNIPROT", "1", "Q111111");
+ dbrs1tos2.setMap(new Mapping(sq2.getDatasetSequence(),
+ new int[] { 1, 5 }, new int[] { 2, 6 }, 1, 1));
+ sq1.getDatasetSequence().addDBRef(dbrs1tos2);
+ assertVerifyAlignment(al, true,
+ "verify failed after addition of valid DBRefEntry/map");
+ // now create a dbref on a new sequence which maps to another sequence
+ // outside of the dataset
+ SequenceI sqout = new Sequence("sqout", "ututututucagcagcag"), sqnew = new Sequence(
+ "sqnew", "EEERRR");
+ DBRefEntry sqnewsqout = new DBRefEntry("ENAFOO", "1", "R000001");
+ sqnewsqout.setMap(new Mapping(sqout, new int[] { 1, 6 }, new int[] { 1,
+ 18 }, 1, 3));
+ al.getDataset().addSequence(sqnew);
+
+ assertVerifyAlignment(al, true,
+ "verify failed after addition of new sequence to dataset");
+ // now start checking exception conditions
+ sqnew.addDBRef(sqnewsqout);
+ assertVerifyAlignment(
+ al,
+ false,
+ "verify passed when a dbref with map to sequence outside of dataset was added");
+ // make the verify pass by adding the outsider back in
+ al.getDataset().addSequence(sqout);
+ assertVerifyAlignment(al, true,
+ "verify should have passed after adding dbref->to sequence in to dataset");
+ // and now the same for a codon mapping...
+ SequenceI sqanotherout = new Sequence("sqanotherout",
+ "aggtutaggcagcagcag");
+
+ AlignedCodonFrame alc = new AlignedCodonFrame();
+ alc.addMap(sqanotherout, sqnew, new MapList(new int[] { 1, 6 },
+ new int[] { 1, 18 }, 3, 1));
+
+ al.addCodonFrame(alc);
+ Assert.assertEquals(al.getDataset().getCodonFrames().size(), 1);
+
+ assertVerifyAlignment(
+ al,
+ false,
+ "verify passed when alCodonFrame mapping to sequence outside of dataset was added");
+ // make the verify pass by adding the outsider back in
+ al.getDataset().addSequence(sqanotherout);
+ assertVerifyAlignment(
+ al,
+ true,
+ "verify should have passed once all sequences involved in alCodonFrame were added to dataset");
+ al.getDataset().addSequence(sqanotherout);
+ assertVerifyAlignment(al, false,
+ "verify should have failed when a sequence was added twice to the dataset");
+ al.getDataset().deleteSequence(sqanotherout);
+ assertVerifyAlignment(al, true,
+ "verify should have passed after duplicate entry for sequence was removed");
+ }
+
+ /**
+ * checks that the sequence data for an alignment's dataset is non-redundant.
+ * Fails if there are sequences with same id, sequence, start, and.
+ */
+
+ public static void assertDatasetIsNormalised(AlignmentI al)
+ {
+ assertDatasetIsNormalised(al, null);
+ }
+
+ /**
+ * checks that the sequence data for an alignment's dataset is non-redundant.
+ * Fails if there are sequences with same id, sequence, start, and.
+ *
+ * @param al
+ * - alignment to verify
+ * @param message
+ * - null or message prepended to exception message.
+ */
+ public static void assertDatasetIsNormalised(AlignmentI al, String message)
+ {
+ if (al.getDataset()!=null)
+ {
+ assertDatasetIsNormalised(al.getDataset(), message);
+ return;
+ }
+ /*
+ * look for pairs of sequences with same ID, start, end, and sequence
+ */
+ List<SequenceI> seqSet = al.getSequences();
+ for (int p=0;p<seqSet.size(); p++)
+ {
+ SequenceI pSeq = seqSet.get(p);
+ for (int q=p+1; q<seqSet.size(); q++)
+ {
+ SequenceI qSeq = seqSet.get(q);
+ if (pSeq.getStart()!=qSeq.getStart())
+ {
+ continue;
+ }
+ if (pSeq.getEnd()!=qSeq.getEnd())
+ {
+ continue;
+ }
+ if (!pSeq.getName().equals(qSeq.getName()))
+ {
+ continue;
+ }
+ if (!Arrays.equals(pSeq.getSequence(), qSeq.getSequence()))
+ {
+ continue;
+ }
+ Assert.fail((message == null ? "" : message + " :")
+ + "Found similar sequences at position " + p + " and " + q
+ + "\n" + pSeq.toString());
+ }
+ }
+ }
+
+ @Test(groups = { "Functional", "Asserts" })
+ public void testAssertDatasetIsNormalised()
+ {
+ Sequence sq1 = new Sequence("s1/1-4", "asdf");
+ Sequence sq1shift = new Sequence("s1/2-5", "asdf");
+ Sequence sq1seqd = new Sequence("s1/1-4", "asdt");
+ Sequence sq2 = new Sequence("s2/1-4", "asdf");
+ Sequence sq1dup = new Sequence("s1/1-4", "asdf");
+
+ Alignment al = new Alignment(new SequenceI[] { sq1 });
+ al.setDataset(null);
+
+ try
+ {
+ assertDatasetIsNormalised(al);
+ } catch (AssertionError ae)
+ {
+ Assert.fail("Single sequence should be valid normalised dataset.");
+ }
+ al.addSequence(sq2);
+ try
+ {
+ assertDatasetIsNormalised(al);
+ } catch (AssertionError ae)
+ {
+ Assert.fail("Two different sequences should be valid normalised dataset.");
+ }
+ /*
+ * now change sq2's name in the alignment. should still be valid
+ */
+ al.findName(sq2.getName()).setName("sq1");
+ try
+ {
+ assertDatasetIsNormalised(al);
+ } catch (AssertionError ae)
+ {
+ Assert.fail("Two different sequences in dataset, but same name in alignment, should be valid normalised dataset.");
+ }
+
+ al.addSequence(sq1seqd);
+ try
+ {
+ assertDatasetIsNormalised(al);
+ } catch (AssertionError ae)
+ {
+ Assert.fail("sq1 and sq1 with different sequence should be distinct.");
+ }
+
+ al.addSequence(sq1shift);
+ try
+ {
+ assertDatasetIsNormalised(al);
+ } catch (AssertionError ae)
+ {
+ Assert.fail("sq1 and sq1 with different start/end should be distinct.");
+ }
+ /*
+ * finally, the failure case
+ */
+ al.addSequence(sq1dup);
+ boolean ssertRaised = false;
+ try
+ {
+ assertDatasetIsNormalised(al);
+
+ } catch (AssertionError ae)
+ {
+ ssertRaised = true;
+ }
+ if (!ssertRaised)
+ {
+ Assert.fail("Expected identical sequence to raise exception.");
+ }
+ }
/*
* Read in Stockholm format test data including secondary structure
* annotations.
assertTrue(ds.getCodonFrames().contains(acf));
}
+ /**
+ * tests the addition of *all* sequences referred to by a sequence being added
+ * to the dataset
+ */
+ @Test(groups = "Functional")
+ public void testCreateDatasetAlignmentWithMappedToSeqs()
+ {
+ // Alignment with two sequences, gapped.
+ SequenceI sq1 = new Sequence("sq1", "A--SDF");
+ SequenceI sq2 = new Sequence("sq2", "G--TRQ");
+
+ // cross-references to two more sequences.
+ DBRefEntry dbr = new DBRefEntry("SQ1", "", "sq3");
+ SequenceI sq3 = new Sequence("sq3", "VWANG");
+ dbr.setMap(new Mapping(sq3, new MapList(new int[] { 1, 4 }, new int[] {
+ 2, 5 }, 1, 1)));
+ sq1.addDBRef(dbr);
+
+ SequenceI sq4 = new Sequence("sq4", "ERKWI");
+ DBRefEntry dbr2 = new DBRefEntry("SQ2", "", "sq4");
+ dbr2.setMap(new Mapping(sq4, new MapList(new int[] { 1, 4 }, new int[] {
+ 2, 5 }, 1, 1)));
+ sq2.addDBRef(dbr2);
+ // and a 1:1 codonframe mapping between them.
+ AlignedCodonFrame alc = new AlignedCodonFrame();
+ alc.addMap(sq1, sq2, new MapList(new int[] { 1, 4 },
+ new int[] { 1, 4 }, 1, 1));
+
+ AlignmentI protein = new Alignment(new SequenceI[] { sq1, sq2 });
+
+ /*
+ * create the alignment dataset
+ * note this creates sequence datasets where missing
+ * as a side-effect (in this case, on seq2
+ */
+
+ // TODO promote this method to AlignmentI
+ ((Alignment) protein).createDatasetAlignment();
+
+ AlignmentI ds = protein.getDataset();
+
+ // should be 4 sequences in dataset - two materialised, and two propagated
+ // from dbref
+ assertEquals(4, ds.getHeight());
+ assertTrue(ds.getSequences().contains(sq1.getDatasetSequence()));
+ assertTrue(ds.getSequences().contains(sq2.getDatasetSequence()));
+ assertTrue(ds.getSequences().contains(sq3));
+ assertTrue(ds.getSequences().contains(sq4));
+ // Should have one codon frame mapping between sq1 and sq2 via dataset
+ // sequences
+ assertEquals(ds.getCodonFrame(sq1.getDatasetSequence()),
+ ds.getCodonFrame(sq2.getDatasetSequence()));
+ }
+
@Test(groups = "Functional")
public void testAddCodonFrame()
{
}
@Test(groups = "Functional")
+ public void testAddSequencePreserveDatasetIntegrity()
+ {
+ Sequence seq = new Sequence("testSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+ Alignment align = new Alignment(new SequenceI[] { seq });
+ align.createDatasetAlignment();
+ AlignmentI ds = align.getDataset();
+ SequenceI copy = new Sequence(seq);
+ copy.insertCharAt(3, 5, '-');
+ align.addSequence(copy);
+ Assert.assertEquals(align.getDataset().getHeight(), 1,
+ "Dataset shouldn't have more than one sequence.");
+
+ Sequence seq2 = new Sequence("newtestSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+ align.addSequence(seq2);
+ Assert.assertEquals(align.getDataset().getHeight(), 2,
+ "Dataset should now have two sequences.");
+
+ assertAlignmentDatasetRefs(align,
+ "addSequence broke dataset reference integrity");
+ }
+ @Test(groups = "Functional")
public void getVisibleStartAndEndIndexTest()
{
Sequence seq = new Sequence("testSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
assertEquals(1, startEnd[0]);
assertEquals(23, startEnd[1]);
}
+
}
assertFalse(ref1.updateFrom(ref2));
assertEquals("10", ref1.getVersion());
}
+
+ @Test(groups = { "Functional" })
+ public void testIsPrimary()
+ {
+ DBRefEntry dbr = new DBRefEntry(DBRefSource.UNIPROT, "", "Q12345");
+ assertTrue(dbr.isPrimary());
+ /*
+ * 1:1 mapping
+ */
+ dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1,
+ 1));
+ assertTrue(dbr.isPrimary());
+ /*
+ * Version string is prefixed with another dbref source string (fail)
+ */
+ dbr.setVersion(DBRefSource.EMBL + ":0");
+ assertFalse(dbr.isPrimary());
+
+ /*
+ * Version string is alphanumeric
+ */
+ dbr.setVersion("0.1.b");
+ assertTrue(dbr.isPrimary());
+
+ /*
+ * 1:1 mapping with shift (fail)
+ */
+ dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 2, 4 }, 1,
+ 1));
+ assertFalse(dbr.isPrimary());
+
+ /*
+ * 1:1 mapping and sequenceRef (fail)
+ */
+ dbr.setMap(new Mapping(new Sequence("foo", "ASDF"), new int[] { 1, 3 },
+ new int[] { 1, 3 }, 1, 1));
+ assertFalse(dbr.isPrimary());
+
+ /*
+ * 1:3 mapping (fail)
+ */
+ dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1,
+ 3));
+ assertFalse(dbr.isPrimary());
+ /*
+ * 2:2 mapping with shift (expected fail, but maybe use case for a pass)
+ */
+ dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 2,
+ 2));
+ assertFalse(dbr.isPrimary());
+
+ /*
+ * Version string is prefixed with another dbref source string
+ */
+ dbr.setVersion(DBRefSource.EMBL + ":0");
+ assertFalse(dbr.isPrimary());
+
+ }
}
sq.setDescription("Test sequence description..");
sq.setVamsasId("TestVamsasId");
- sq.setSourceDBRef(new DBRefEntry("PDB", "version0", "1TST"));
+ sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST"));
- sq.addDBRef(new DBRefEntry("PDB", "version1", "1Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version2", "2Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version3", "3Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version4", "4Tst"));
+ sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB"));
sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
+
+ DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
+ DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB");
+ //FIXME pdb2pdb's matching PDBEntry has Type.MMCIF - but 2.10 only has PDBEntry with type==PDB to indicate ID is a real PDB entry
+
+ List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb });
+
+ sq.getDatasetSequence().addDBRef(pdb1pdb);
+ sq.getDatasetSequence().addDBRef(pdb2pdb);
sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version1", "1Tst"));
- sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version2", "2Tst"));
- sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version3", "3Tst"));
+ new DBRefEntry("PDB", "version3", "3PDB"));
sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version4", "4Tst"));
-
- sq.getDatasetSequence().addPDBId(
- new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
- sq.getDatasetSequence().addPDBId(
- new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
+ new DBRefEntry("PDB", "version4", "4PDB"));
+
+ PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
+ PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
+ PDBEntry pdbe2a=new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2");
+ PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2");
sq.getDatasetSequence().addPDBId(
- new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
+ pdbe1a);
sq.getDatasetSequence().addPDBId(
- new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
+ pdbe1b);
+ sq.getDatasetSequence().addPDBId(pdbe2a);
+ sq.getDatasetSequence().addPDBId(pdbe2b);
+
+ /*
+ * test we added pdb entries to the dataset sequence
+ */
+ Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays
+ .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }),
+ "PDB Entries were not found on dataset sequence.");
+ /*
+ * we should recover a pdb entry that is on the dataset sequence via PDBEntry
+ */
+ Assert.assertEquals(pdbe1a,
+ sq.getDatasetSequence().getPDBEntry("1PDB"),
+ "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
ArrayList<Annotation> annotsList = new ArrayList<Annotation>();
System.out.println(">>>>>> " + sq.getSequenceAsString().length());
annotsList.add(new Annotation("A", "A", 'X', 0.1f));
new AlignmentAnnotation("Test annot", "Test annot description",
annots));
Assert.assertEquals(sq.getDescription(), "Test sequence description..");
- Assert.assertEquals(sq.getDBRefs().length, 4);
+ Assert.assertEquals(sq.getDBRefs().length, 5);
Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
Assert.assertNotNull(sq.getAnnotation());
Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
Assert.assertEquals(derived.getDescription(),
"Test sequence description..");
- Assert.assertEquals(derived.getDBRefs().length, 4);
+ Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset
Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
Assert.assertNotNull(derived.getAnnotation());
Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
assertNotNull(sq.getSequenceFeatures());
assertArrayEquals(sq.getSequenceFeatures(),
derived.getSequenceFeatures());
+
+ /*
+ * verify we have primary db refs *just* for PDB IDs with associated
+ * PDBEntry objects
+ */
+
+ assertEquals(primRefs, sq.getPrimaryDBRefs());
+ assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
+
+ assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
+
}
/**
assertSame(dbref3, sq.getDBRefs()[2]);
assertEquals("3", dbref2.getVersion());
}
+
+ @Test(groups = { "Functional" })
+ public void testGetPrimaryDBRefs()
+ {
+ /*
+ * test PDB relationships for for getPrimaryDBRefs
+ */
+ SequenceI seq = new Sequence("aseq", "ASDF");
+ DBRefEntry upentry = new DBRefEntry("UNIPROT", "0", "1qip");
+ // primary - uniprot
+ seq.addDBRef(upentry);
+ // primary - type is PDB
+ DBRefEntry pdbentry = new DBRefEntry("PDB", "0", "1qip");
+ seq.addDBRef(pdbentry);
+ // not primary - type of PDBEntry is not PDB
+ seq.addDBRef(new DBRefEntry("PDB", "0", "1AAA"));
+ // not primary - no PDBEntry
+ seq.addDBRef(new DBRefEntry("PDB", "0", "1DDD"));
+ seq.addPDBId(new PDBEntry("1QIP", null, Type.PDB, null));
+ seq.addPDBId(new PDBEntry("1AAA", null, null, null));
+ assertTrue("Couldn't find simple primary reference (UNIPROT)", seq
+ .getPrimaryDBRefs().contains(upentry));
+ assertTrue("Couldn't find expected PDB primary reference", seq
+ .getPrimaryDBRefs().contains(pdbentry));
+ assertEquals(2, seq.getPrimaryDBRefs().size());
+ }
}
assertEquals(5, dbrefs.length);
assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
assertEquals("CAA30420.1", dbrefs[0].getAccessionId());
+ // TODO: verify getPrimaryDBRefs() for peptide products
assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap());
assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
assertEquals("CAA30420.1", dbrefs[1].getAccessionId());
package jalview.ext.ensembl;
import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
import jalview.datamodel.DBRefEntry;
@Test(groups = "Functional")
public void testGetCrossReferences()
{
+ String dbName = "ENSEMBL";
+ String dbVers = "0.6.2b1";
System.out.println(JSON);
- EnsemblXref testee = new EnsemblXref("http://rest.ensembl.org")
+ EnsemblXref testee = new EnsemblXref("http://rest.ensembl.org", dbName,
+ dbVers)
{
@Override
protected BufferedReader getHttpResponse(URL url, List<String> ids)
assertEquals(2, dbrefs.size());
assertEquals("CCDS", dbrefs.get(0).getSource());
assertEquals("CCDS5863", dbrefs.get(0).getAccessionId());
+ assertFalse(dbrefs.get(0).isPrimary());
+ assertEquals(dbName + ":" + dbVers, dbrefs.get(0).getVersion());
// Uniprot name should get converted to Jalview canonical form
assertEquals("UNIPROT", dbrefs.get(1).getSource());
assertEquals("P15056", dbrefs.get(1).getAccessionId());
+ assertEquals(dbName + ":" + dbVers, dbrefs.get(1).getVersion());
+ assertFalse(dbrefs.get(1).isPrimary());
}
}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.io;
+
+import jalview.analysis.CrossRef;
+import jalview.api.AlignmentViewPanel;
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.AlignmentTest;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.gui.CrossRefAction;
+import jalview.gui.Desktop;
+import jalview.gui.Jalview2XML;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+@Test(singleThreaded = true)
+public class CrossRef2xmlTests extends Jalview2xmlBase
+{
+
+ /**
+ * test store and recovery of all reachable cross refs from all reachable
+ * crossrefs for one or more fetched db refs. Currently, this test has a known
+ * failure case.
+ *
+ * @throws Exception
+ */
+ @Test(groups = { "Operational" }, enabled = true)
+ public void testRetrieveAndShowCrossref() throws Exception
+ {
+
+ List<String> failedDBRetr = new ArrayList<String>();
+ List<String> failedXrefMenuItems = new ArrayList<String>();
+ List<String> failedProjectRecoveries = new ArrayList<String>();
+
+ // for every set of db queries
+ // retrieve db query
+ // verify presence of expected xrefs
+ // show xrefs - verify expected type of frame is shown for each xref
+ // show xrefs again
+ // - verify original -> xref -> xref(original) recovers frame containing at
+ // least the first retrieved sequence
+ // store
+ // 1. whole project
+ // 2. individual frames
+ // 3. load each one back and verify
+ // . aligned sequences (.toString() )
+ // . xrefs (.toString() )
+ // . codonframes
+ //
+ //
+ HashMap<String, String> dbtoviewBit = new HashMap<String, String>();
+ List<String> keyseq = new ArrayList<String>();
+ HashMap<String, File> savedProjects = new HashMap<String, File>();
+
+ for (String[] did : new String[][] { { "ENSEMBL", "ENSG00000157764" },
+ { "UNIPROT", "P01731" } })
+ {
+ // pass counters - 0 - first pass, 1 means retrieve project rather than
+ // perform action
+ int pass1 = 0, pass2 = 0, pass3 = 0;
+ // each do loop performs two iterations in the first outer loop pass, but
+ // only performs one iteration on the second outer loop
+ // ie. pass 1 = 0 {pass 2= 0 { pass 3 = 0,1 }, pass 2=1 { pass 3 = 0 }}, 1
+ // { pass 2 = 0 { pass 3 = 0 } }
+ do
+ {
+ String first = did[0] + " " + did[1];
+ AlignFrame af = null;
+ boolean dna;
+ AlignmentI retral;
+ AlignmentI dataset;
+ SequenceI[] seqs;
+ List<String> ptypes = null;
+ if (pass1 == 0)
+ {
+ // retrieve dbref
+
+ List<AlignFrame> afs = jalview.gui.SequenceFetcher.fetchAndShow(
+ did[0], did[1]);
+ if (afs.size() == 0)
+ {
+ failedDBRetr.add("Didn't retrieve " + first);
+ break;
+ }
+ keyseq.add(first);
+ af = afs.get(0);
+
+ // verify references for retrieved data
+ AlignmentTest.assertAlignmentDatasetRefs(af.getViewport()
+ .getAlignment(), "Pass (" + pass1 + "," + pass2 + ","
+ + pass3 + "): Fetch " + first + ":");
+ assertDatasetIsNormalisedKnownDefect(af.getViewport()
+ .getAlignment(), "Pass (" + pass1 + "," + pass2 + ","
+ + pass3 + "): Fetch " + first + ":");
+ dna = af.getViewport().getAlignment().isNucleotide();
+ retral = af.getViewport().getAlignment();
+ dataset = retral.getDataset();
+ seqs = retral.getSequencesArray();
+
+ }
+ else
+ {
+ Desktop.instance.closeAll_actionPerformed(null);
+ // recover stored project
+ af = new FileLoader(false).LoadFileWaitTillLoaded(savedProjects
+ .get(first).toString(), FormatAdapter.FILE);
+ System.out.println("Recovered view for '" + first + "' from '"
+ + savedProjects.get(first).toString() + "'");
+ dna = af.getViewport().getAlignment().isNucleotide();
+ retral = af.getViewport().getAlignment();
+ dataset = retral.getDataset();
+ seqs = retral.getSequencesArray();
+
+ // verify references for recovered data
+ AlignmentTest.assertAlignmentDatasetRefs(af.getViewport()
+ .getAlignment(), "Pass (" + pass1 + "," + pass2 + ","
+ + pass3 + "): Recover " + first + ":");
+ assertDatasetIsNormalisedKnownDefect(af.getViewport()
+ .getAlignment(), "Pass (" + pass1 + "," + pass2 + ","
+ + pass3 + "): Recover " + first + ":");
+
+ }
+
+ // store project on first pass, compare next pass
+ stringify(dbtoviewBit, savedProjects, first, af.alignPanel);
+
+ ptypes = (seqs == null || seqs.length == 0) ? null : new CrossRef(
+ seqs, dataset).findXrefSourcesForSequences(dna);
+
+ // start of pass2: retrieve each cross-ref for fetched or restored
+ // project.
+ do // first cross ref and recover crossref loop
+ {
+
+ for (String db : ptypes)
+ {
+ // counter for splitframe views retrieved via crossref
+ int firstcr_ap = 0;
+ // build next key so we an retrieve all views
+ String nextxref = first + " -> " + db + "{" + firstcr_ap + "}";
+ // perform crossref action, or retrieve stored project
+ List<AlignmentViewPanel> cra_views = new ArrayList<AlignmentViewPanel>();
+ CrossRefAction cra = null;
+
+ if (pass2 == 0)
+ { // retrieve and show cross-refs in this thread
+ cra = new CrossRefAction(af, seqs, dna, db);
+ cra.run();
+ if (cra.getXrefViews().size() == 0)
+ {
+ failedXrefMenuItems.add("No crossrefs retrieved for "
+ + first + " -> " + db);
+ continue;
+ }
+ cra_views = cra.getXrefViews();
+ assertNucleotide(cra_views.get(0),
+ "Nucleotide panel included proteins for " + first
+ + " -> " + db);
+ assertProtein(cra_views.get(1),
+ "Protein panel included nucleotides for " + first
+ + " -> " + db);
+ }
+ else
+ {
+ Desktop.instance.closeAll_actionPerformed(null);
+ pass3 = 0;
+ // recover stored project
+ File storedProject = savedProjects.get(nextxref);
+ if (storedProject == null)
+ {
+ failedProjectRecoveries.add("Failed to store a view for '"
+ + nextxref + "'");
+ continue;
+ }
+
+ // recover stored project
+ AlignFrame af2 = new FileLoader(false)
+ .LoadFileWaitTillLoaded(savedProjects.get(nextxref)
+ .toString(), FormatAdapter.FILE);
+ System.out.println("Recovered view for '" + nextxref
+ + "' from '" + savedProjects.get(nextxref).toString()
+ + "'");
+ // gymnastics to recover the alignPanel/Complementary alignPanel
+ if (af2.getViewport().isNucleotide())
+ {
+ // top view, then bottom
+ cra_views.add(af2.getViewport().getAlignPanel());
+ cra_views.add(((jalview.gui.AlignViewport) af2
+ .getViewport().getCodingComplement())
+ .getAlignPanel());
+
+ }
+ else
+ {
+ // bottom view, then top
+ cra_views.add(((jalview.gui.AlignViewport) af2
+ .getViewport().getCodingComplement())
+ .getAlignPanel());
+ cra_views.add(af2.getViewport().getAlignPanel());
+
+ }
+ }
+ HashMap<String, List<String>> xrptypes = new HashMap<String, List<String>>();
+ // first save/verify views.
+ for (AlignmentViewPanel avp : cra_views)
+ {
+ nextxref = first + " -> " + db + "{" + firstcr_ap++ + "}";
+ // verify references for this panel
+ AlignmentTest.assertAlignmentDatasetRefs(avp.getAlignment(),
+ "Pass (" + pass1 + "," + pass2 + "," + pass3
+ + "): before start of pass3: " + nextxref
+ + ":");
+ assertDatasetIsNormalisedKnownDefect(avp.getAlignment(),
+ "Pass (" + pass1 + "," + pass2 + "," + pass3
+ + "): before start of pass3: " + nextxref
+ + ":");
+
+ SequenceI[] xrseqs = avp.getAlignment().getSequencesArray();
+
+ List<String> _xrptypes = (seqs == null || seqs.length == 0) ? null
+ : new CrossRef(xrseqs, dataset)
+ .findXrefSourcesForSequences(avp
+ .getAlignViewport().isNucleotide());
+
+ stringify(dbtoviewBit, savedProjects, nextxref, avp);
+ xrptypes.put(nextxref, _xrptypes);
+
+ }
+
+ // now do the second xref pass starting from either saved or just
+ // recovered split pane, in sequence
+ do // retrieve second set of cross refs or recover and verify
+ {
+ firstcr_ap = 0;
+ for (AlignmentViewPanel avp : cra_views)
+ {
+ nextxref = first + " -> " + db + "{" + firstcr_ap++ + "}";
+ for (String xrefdb : xrptypes.get(nextxref))
+ {
+ List<AlignmentViewPanel> cra_views2 = new ArrayList<AlignmentViewPanel>();
+ int q = 0;
+ String nextnextxref = nextxref
+ + " -> " + xrefdb + "{" + q + "}";
+
+ if (pass3 == 0)
+ {
+
+ SequenceI[] xrseqs = avp.getAlignment()
+ .getSequencesArray();
+ AlignFrame nextaf = Desktop.getAlignFrameFor(avp
+ .getAlignViewport());
+
+ cra = new CrossRefAction(nextaf, xrseqs, avp
+ .getAlignViewport().isNucleotide(), xrefdb);
+ cra.run();
+ if (cra.getXrefViews().size() == 0)
+ {
+ failedXrefMenuItems
+ .add("No crossrefs retrieved for '"
+ + nextxref + "' to " + xrefdb + " via '"
+ + nextaf.getTitle() + "'");
+ continue;
+ }
+ cra_views2 = cra.getXrefViews();
+ assertNucleotide(cra_views2.get(0),
+ "Nucleotide panel included proteins for '"
+ + nextxref + "' to " + xrefdb
+ + " via '" + nextaf.getTitle() + "'");
+ assertProtein(cra_views2.get(1),
+ "Protein panel included nucleotides for '"
+ + nextxref + "' to " + xrefdb
+ + " via '" + nextaf.getTitle() + "'");
+
+ }
+ else
+ {
+ Desktop.instance.closeAll_actionPerformed(null);
+ // recover stored project
+ File storedProject = savedProjects.get(nextnextxref);
+ if (storedProject == null)
+ {
+ failedProjectRecoveries
+ .add("Failed to store a view for '"
+ + nextnextxref + "'");
+ continue;
+ }
+ AlignFrame af2 = new FileLoader(false)
+ .LoadFileWaitTillLoaded(
+ savedProjects.get(nextnextxref)
+ .toString(), FormatAdapter.FILE);
+ System.out.println("Recovered view for '"
+ + nextnextxref + "' from '"
+ + savedProjects.get(nextnextxref).toString()
+ + "'");
+ // gymnastics to recover the alignPanel/Complementary
+ // alignPanel
+ if (af2.getViewport().isNucleotide())
+ {
+ // top view, then bottom
+ cra_views2.add(af2.getViewport().getAlignPanel());
+ cra_views2.add(((jalview.gui.AlignViewport) af2
+ .getViewport().getCodingComplement())
+ .getAlignPanel());
+
+ }
+ else
+ {
+ // bottom view, then top
+ cra_views2.add(((jalview.gui.AlignViewport) af2
+ .getViewport().getCodingComplement())
+ .getAlignPanel());
+ cra_views2.add(af2.getViewport().getAlignPanel());
+ }
+ Assert.assertEquals(cra_views2.size(), 2);
+ Assert.assertNotNull(cra_views2.get(0));
+ Assert.assertNotNull(cra_views2.get(1));
+ }
+
+ for (AlignmentViewPanel nextavp : cra_views2)
+ {
+ nextnextxref = nextxref
+ + " -> " + xrefdb + "{" + q++ + "}";
+
+ // verify references for this panel
+ AlignmentTest.assertAlignmentDatasetRefs(
+ nextavp.getAlignment(), "" + "Pass (" + pass1
+ + "," + pass2 + "): For "
+ + nextnextxref + ":");
+ assertDatasetIsNormalisedKnownDefect(
+ nextavp.getAlignment(), "" + "Pass (" + pass1
+ + "," + pass2 + "): For "
+ + nextnextxref + ":");
+
+ stringify(dbtoviewBit, savedProjects, nextnextxref,
+ nextavp);
+ keyseq.add(nextnextxref);
+ }
+ } // end of loop around showing all xrefdb for crossrf2
+
+ } // end of loop around all viewpanels from crossrf1
+ } while (pass2 == 2 && pass3++ < 2);
+ // fetchdb->crossref1->crossref-2->verify for xrefs we
+ // either loop twice when pass2=0, or just once when pass2=1
+ // (recovered project from previous crossref)
+
+ } // end of loop over db-xrefs for crossref-2
+
+ // fetchdb-->crossref1
+ // for each xref we try to retrieve xref, store and verify when
+ // pass1=0, or just retrieve and verify when pass1=1
+ } while (pass1 == 1 && pass2++ < 2);
+ // fetchdb
+ // for each ref we
+ // loop twice: first, do the retrieve, second recover from saved project
+
+ // increment pass counters, so we repeat traversal starting from the
+ // oldest saved project first.
+ if (pass1 == 0)
+ {
+ // verify stored projects for first set of cross references
+ pass1 = 1;
+ // and verify cross-references retrieved from stored projects
+ pass2 = 0;
+ pass3 = 0;
+ }
+ else
+ {
+ pass1++;
+ }
+ } while (pass1 < 3);
+ }
+ if (failedXrefMenuItems.size() > 0)
+ {
+ for (String s : failedXrefMenuItems)
+ {
+ System.err.println(s);
+ }
+ Assert.fail("Faulty xref menu (" + failedXrefMenuItems.size()
+ + " counts)");
+ }
+ if (failedProjectRecoveries.size() > 0)
+ {
+
+ for (String s : failedProjectRecoveries)
+ {
+ System.err.println(s);
+ }
+ Assert.fail("Didn't recover projects for some retrievals (did they retrieve ?) ("
+ + failedProjectRecoveries.size() + " counts)");
+ }
+ if (failedDBRetr.size() > 0)
+ {
+ for (String s : failedProjectRecoveries)
+ {
+ System.err.println(s);
+ }
+ Assert.fail("Didn't retrieve some db refs for checking cross-refs ("
+ + failedDBRetr.size() + " counts)");
+ }
+ }
+
+ /**
+ * wrapper to trap known defect for AH002001 testcase
+ *
+ * @param alignment
+ * @param string
+ */
+ private void assertDatasetIsNormalisedKnownDefect(AlignmentI al,
+ String message)
+ {
+ try
+ {
+ AlignmentTest.assertDatasetIsNormalised(al, message);
+ } catch (AssertionError ae)
+ {
+ if (!ae.getMessage().endsWith("EMBL|AH002001"))
+ {
+ throw ae;
+ }
+ else
+ {
+ System.out
+ .println("Ignored exception for known defect: JAL-2179 : "
+ + message);
+ }
+
+ }
+ }
+
+ private void assertProtein(AlignmentViewPanel alignmentViewPanel,
+ String message)
+ {
+ assertType(true, alignmentViewPanel, message);
+ }
+
+ private void assertNucleotide(AlignmentViewPanel alignmentViewPanel,
+ String message)
+ {
+ assertType(false, alignmentViewPanel, message);
+ }
+
+ private void assertType(boolean expectProtein,
+ AlignmentViewPanel alignmentViewPanel, String message)
+ {
+ List<SequenceI> nonType = new ArrayList<SequenceI>();
+ for (SequenceI sq : alignmentViewPanel.getAlignViewport()
+ .getAlignment()
+ .getSequences())
+ {
+ if (sq.isProtein() != expectProtein)
+ {
+ nonType.add(sq);
+ }
+ }
+ if (nonType.size() > 0)
+ {
+ Assert.fail(message + " [ "
+ + (expectProtein ? "nucleotides were " : "proteins were ")
+ + nonType.toString()
+ + " ]");
+ }
+ }
+
+ /**
+ * first time called, record strings derived from alignment and
+ * alignedcodonframes, and save view to a project file. Second time called,
+ * compare strings to existing ones. org.testng.Assert.assertTrue on
+ * stringmatch
+ *
+ * @param dbtoviewBit
+ * map between xrefpath and view string
+ * @param savedProjects
+ * - map from xrefpath to saved project filename (createTempFile)
+ * @param xrefpath
+ * - xrefpath - unique ID for this context (composed of sequence of
+ * db-fetch/cross-ref actions preceeding state)
+ * @param avp
+ * - viewpanel to store (for viewpanels in splitframe, the same
+ * project should be written for both panels, only one needs
+ * recovering for comparison on the next stringify call, but each
+ * viewpanel needs to be called with a distinct xrefpath to ensure
+ * each one's strings are compared)
+ */
+ private void stringify(HashMap<String, String> dbtoviewBit,
+ HashMap<String, File> savedProjects, String xrefpath,
+ AlignmentViewPanel avp)
+ {
+ if (savedProjects != null)
+ {
+ if (savedProjects.get(xrefpath) == null)
+ {
+ // write a project file for this view. On the second pass, this will be
+ // recovered and cross-references verified
+ try
+ {
+ File prfile = File.createTempFile("crossRefTest", ".jvp");
+ AlignFrame af = Desktop.getAlignFrameFor(avp.getAlignViewport());
+ new Jalview2XML(false).saveAlignment(af, prfile.toString(),
+ af.getTitle());
+ System.out.println("Written view from '" + xrefpath + "' as '"
+ + prfile.getAbsolutePath() + "'");
+ savedProjects.put(xrefpath, prfile);
+ } catch (IOException q)
+ {
+ Assert.fail("Unexpected IO Exception", q);
+ }
+ }
+ else
+ {
+ System.out.println("Stringify check on view from '" + xrefpath
+ + "' [ possibly retrieved from '"
+ + savedProjects.get(xrefpath).getAbsolutePath() + "' ]");
+
+ }
+ }
+
+ StringBuilder sbr = new StringBuilder();
+ sbr.append(avp.getAlignment().toString());
+ sbr.append("\n");
+ sbr.append("<End of alignment>");
+ sbr.append("\n");
+ sbr.append(avp.getAlignment().getDataset());
+ sbr.append("\n");
+ sbr.append("<End of dataset>");
+ sbr.append("\n");
+ int p = 0;
+ if (avp.getAlignment().getCodonFrames() != null)
+ {
+ for (AlignedCodonFrame ac : avp.getAlignment().getCodonFrames())
+ {
+ sbr.append("<AlignedCodonFrame " + p++ + ">");
+ sbr.append("\n");
+ sbr.append(ac.toString());
+ sbr.append("\n");
+ }
+ }
+ String dbt = dbtoviewBit.get(xrefpath);
+ if (dbt == null)
+ {
+ dbtoviewBit.put(xrefpath, sbr.toString());
+ }
+ else
+ {
+ Assert.assertEquals(sbr.toString(), dbt, "stringify mismatch for "
+ + xrefpath);
+ }
+ }
+}
--- /dev/null
+package jalview.io;
+
+import jalview.bin.Cache;
+import jalview.bin.Jalview;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.SequenceI;
+import jalview.gui.Desktop;
+
+import java.util.Date;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeTest;
+
+public class Jalview2xmlBase
+{
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @BeforeClass(alwaysRun = true)
+ public static void setUpBeforeClass() throws Exception
+ {
+ /*
+ * use read-only test properties file
+ */
+ Cache.loadProperties("test/jalview/io/testProps.jvprops");
+
+ /*
+ * set news feed last read to a future time to ensure no
+ * 'unread' news item is displayed
+ */
+ Date oneHourFromNow = new Date(System.currentTimeMillis() + 3600 * 1000);
+ Cache.setDateProperty("JALVIEW_NEWS_RSS_LASTMODIFIED", oneHourFromNow);
+
+ Jalview.main(new String[] {});
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @AfterClass(alwaysRun = true)
+ public static void tearDownAfterClass() throws Exception
+ {
+ jalview.gui.Desktop.instance.closeAll_actionPerformed(null);
+ }
+
+ @BeforeTest(alwaysRun = true)
+ public static void clearDesktop()
+ {
+ if (Desktop.instance != null && Desktop.getAlignFrames() != null)
+ {
+ Desktop.instance.closeAll_actionPerformed(null);
+ }
+ }
+
+ public int countDsAnn(jalview.viewmodel.AlignmentViewport avp)
+ {
+ int numdsann = 0;
+ for (SequenceI sq : avp.getAlignment().getDataset().getSequences())
+ {
+ if (sq.getAnnotation() != null)
+ {
+ for (AlignmentAnnotation dssa : sq.getAnnotation())
+ {
+ if (dssa.isValidStruc())
+ {
+ numdsann++;
+ }
+ }
+ }
+ }
+ return numdsann;
+ }
+
+}
import jalview.api.AlignViewportI;
import jalview.api.AlignmentViewPanel;
import jalview.api.ViewStyleI;
-import jalview.bin.Cache;
-import jalview.bin.Jalview;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.HiddenSequences;
import java.io.File;
import java.util.ArrayList;
-import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.testng.Assert;
import org.testng.AssertJUnit;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@Test(singleThreaded = true)
-public class Jalview2xmlTests
+public class Jalview2xmlTests extends Jalview2xmlBase
{
- /**
- * @throws java.lang.Exception
- */
- @BeforeClass(alwaysRun = true)
- public static void setUpBeforeClass() throws Exception
- {
- /*
- * use read-only test properties file
- */
- Cache.loadProperties("test/jalview/io/testProps.jvprops");
-
- /*
- * set news feed last read to a future time to ensure no
- * 'unread' news item is displayed
- */
- Date oneHourFromNow = new Date(System.currentTimeMillis() + 3600 * 1000);
- Cache.setDateProperty("JALVIEW_NEWS_RSS_LASTMODIFIED", oneHourFromNow);
-
- Jalview.main(new String[] {});
- }
-
- /**
- * @throws java.lang.Exception
- */
- @AfterClass(alwaysRun = true)
- public static void tearDownAfterClass() throws Exception
- {
- Desktop.instance.closeAll_actionPerformed(null);
- }
-
- int countDsAnn(jalview.viewmodel.AlignmentViewport avp)
- {
- int numdsann = 0;
- for (SequenceI sq : avp.getAlignment().getDataset().getSequences())
- {
- if (sq.getAnnotation() != null)
- {
- for (AlignmentAnnotation dssa : sq.getAnnotation())
- {
- if (dssa.isValidStruc())
- {
- numdsann++;
- }
- }
- }
- }
- return numdsann;
- }
-
@Test(groups = { "Functional" })
public void testRNAStructureRecovery() throws Exception
{
--- /dev/null
+#---JalviewX Properties File---
+#Fri Apr 25 09:54:25 BST 2014
+SCREEN_Y=768
+SCREEN_X=936
+SHOW_WSDISCOVERY_ERRORS=true
+LATEST_VERSION=2.8.0b1
+SHOW_CONSERVATION=true
+JALVIEW_RSS_WINDOW_SCREEN_WIDTH=550
+JAVA_CONSOLE_SCREEN_WIDTH=450
+LAST_DIRECTORY=/Volumes/Data/Users/jimp/Documents/testing/Jalview/examples
+ID_ITALICS=true
+SORT_ALIGNMENT=No sort
+SHOW_IDENTITY=true
+WSMENU_BYHOST=false
+SEQUENCE_LINKS=EMBL-EBI Search|http\://www.ebi.ac.uk/ebisearch/search.ebi?db\=allebi&query\=$SEQUENCE_ID$
+SHOW_FULLSCREEN=false
+RECENT_URL=http\://www.jalview.org/examples/exampleFile_2_7.jar
+FONT_NAME=SansSerif
+BLC_JVSUFFIX=true
+VERSION_CHECK=false
+YEAR=2011
+SHOW_DBREFS_TOOLTIP=true
+MSF_JVSUFFIX=true
+SCREENGEOMETRY_HEIGHT=1600
+JAVA_CONSOLE_SCREEN_Y=475
+JAVA_CONSOLE_SCREEN_X=830
+PFAM_JVSUFFIX=true
+PIR_JVSUFFIX=true
+STARTUP_FILE=http\://www.jalview.org/examples/exampleFile_2_3.jar
+JAVA_CONSOLE_SCREEN_HEIGHT=162
+PIR_MODELLER=false
+GAP_SYMBOL=-
+SHOW_QUALITY=true
+SHOW_GROUP_CONSERVATION=false
+SHOW_JWS2_SERVICES=true
+SHOW_NPFEATS_TOOLTIP=true
+FONT_STYLE=plain
+ANTI_ALIAS=false
+SORT_BY_TREE=false
+RSBS_SERVICES=|Multi-Harmony|Analysis|Sequence Harmony and Multi-Relief (Brandt et al. 2010)|hseparable,gapCharacter\='-',returns\='ANNOTATION'|?tool\=jalview|http\://zeus.few.vu.nl/programs/shmrwww/index.php?tool\=jalview&groups\=$PARTITION\:min\='2',minsize\='2',sep\=' '$&ali_file\=$ALIGNMENT\:format\='FASTA',writeasfile$
+AUTHORFNAMES=Jim Procter, Andrew Waterhouse, Jan Engelhardt, Lauren Lui, Michele Clamp, James Cuff, Steve Searle, David Martin & Geoff Barton
+JALVIEW_RSS_WINDOW_SCREEN_HEIGHT=328
+SHOW_GROUP_CONSENSUS=false
+SHOW_CONSENSUS_HISTOGRAM=true
+SHOW_OVERVIEW=false
+AUTHORS=J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
+FIGURE_AUTOIDWIDTH=false
+SCREEN_WIDTH=900
+ANNOTATIONCOLOUR_MIN=ffc800
+SHOW_STARTUP_FILE=false
+RECENT_FILE=examples/uniref50.fa\t/Volumes/Data/Users/jimp/Documents/testing/Jalview/examples/RF00031_folded.stk\t/Volumes/Data/Users/jimp/bs_ig_mult.out
+DEFAULT_FILE_FORMAT=FASTA
+SHOW_JAVA_CONSOLE=false
+VERSION=2.8b1
+FIGURE_USERIDWIDTH=
+WSMENU_BYTYPE=false
+DEFAULT_COLOUR=None
+NOQUESTIONNAIRES=true
+JALVIEW_NEWS_RSS_LASTMODIFIED=Apr 23, 2014 2\:53\:26 PM
+BUILD_DATE=01 November 2013
+PILEUP_JVSUFFIX=true
+SHOW_CONSENSUS_LOGO=false
+SCREENGEOMETRY_WIDTH=2560
+SHOW_ANNOTATIONS=true
+JALVIEW_RSS_WINDOW_SCREEN_Y=0
+USAGESTATS=false
+JALVIEW_RSS_WINDOW_SCREEN_X=0
+SHOW_UNCONSERVED=false
+SHOW_JVSUFFIX=true
+SCREEN_HEIGHT=650
+ANNOTATIONCOLOUR_MAX=ff0000
+AUTO_CALC_CONSENSUS=true
+FASTA_JVSUFFIX=true
+DAS_ACTIVE_SOURCE=
+JWS2HOSTURLS=http\://www.compbio.dundee.ac.uk/jabaws
+PAD_GAPS=false
+CLUSTAL_JVSUFFIX=true
+SHOW_ENFIN_SERVICES=true
+FONT_SIZE=10
+RIGHT_ALIGN_IDS=false
+USE_PROXY=false
+WRAP_ALIGNMENT=false
+DAS_REGISTRY_URL=http\://www.nowhere/
import jalview.ws.dbsources.Uniprot;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import org.testng.annotations.AfterClass;
@Test(groups = { "Functional" })
public void testStandardProtDbs()
{
- String[] defdb = DBRefSource.PROTEINDBS;
+ List<String> defdb = new ArrayList<String>();
+ defdb.addAll(Arrays.asList(DBRefSource.PROTEINDBS));
+ defdb.add(DBRefSource.PDB);
List<DbSourceProxy> srces = new ArrayList<DbSourceProxy>();
SequenceFetcher sfetcher = new SequenceFetcher();
boolean pdbFound = false;