From d62b90cb6effb7b380e5f7d590691dd884b024cf Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 12 May 2015 16:35:32 +0100 Subject: [PATCH 1/1] JAL-1712 fixes/tests for Castor binding and 'show flanking regions' --- resources/embl_mapping.xml | 21 +-- src/jalview/analysis/AlignmentUtils.java | 29 ++-- src/jalview/datamodel/AlignmentAnnotation.java | 23 ++- src/jalview/datamodel/PDBEntry.java | 4 + src/jalview/datamodel/Sequence.java | 16 +- src/jalview/datamodel/SequenceI.java | 7 +- src/jalview/datamodel/UniprotEntry.java | 31 ++-- src/jalview/datamodel/UniprotFile.java | 12 +- src/jalview/datamodel/UniprotProteinName.java | 19 ++- src/jalview/datamodel/UniprotSequence.java | 26 +++- src/jalview/datamodel/xdb/embl/BasePosition.java | 6 + src/jalview/datamodel/xdb/embl/EmblEntry.java | 57 +++---- src/jalview/datamodel/xdb/embl/EmblError.java | 5 + src/jalview/datamodel/xdb/embl/EmblFeature.java | 25 ++-- .../datamodel/xdb/embl/EmblFeatureLocElement.java | 6 + .../datamodel/xdb/embl/EmblFeatureLocations.java | 27 ++-- src/jalview/datamodel/xdb/embl/EmblFile.java | 53 +++---- src/jalview/datamodel/xdb/embl/EmblSequence.java | 5 + src/jalview/datamodel/xdb/embl/Qualifier.java | 8 + src/jalview/ws/dbsources/EmblXmlSource.java | 29 ++-- src/jalview/ws/dbsources/Uniprot.java | 158 ++++++++++++-------- src/jalview/ws/ebi/EBIFetchClient.java | 24 ++- test/jalview/analysis/AlignmentUtilsTests.java | 141 +++++++++++++++-- .../datamodel/AlignmentAnnotationTests.java | 49 +++++- test/jalview/datamodel/SequenceTest.java | 86 +++++++++++ test/jalview/datamodel/xdb/embl/EmblFileTest.java | 127 ++++++++++++++++ test/jalview/ws/dbsources/UniprotTest.java | 124 +++++++++++++++ 27 files changed, 860 insertions(+), 258 deletions(-) create mode 100644 test/jalview/datamodel/xdb/embl/EmblFileTest.java create mode 100644 test/jalview/ws/dbsources/UniprotTest.java diff --git a/resources/embl_mapping.xml b/resources/embl_mapping.xml index 7199354..ccbde5e 100644 --- a/resources/embl_mapping.xml +++ b/resources/embl_mapping.xml @@ -34,17 +34,16 @@ - + - + - @@ -54,7 +53,7 @@ - + @@ -64,11 +63,10 @@ - + - @@ -80,7 +78,6 @@ - @@ -95,18 +92,17 @@ - + - + - + - @@ -118,7 +114,6 @@ - @@ -130,7 +125,6 @@ - @@ -148,7 +142,6 @@ - diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index a4aeac7..df30234 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -77,18 +77,22 @@ public class AlignmentUtils for (SequenceI s : core.getSequences()) { SequenceI newSeq = s.deriveSequence(); - if (newSeq.getStart() > maxoffset + final int newSeqStart = newSeq.getStart() - 1; + if (newSeqStart > maxoffset && newSeq.getDatasetSequence().getStart() < s.getStart()) { - maxoffset = newSeq.getStart(); + maxoffset = newSeqStart; } sq.add(newSeq); } if (flankSize > -1) { - maxoffset = flankSize; + maxoffset = Math.min(maxoffset, flankSize); } - // now add offset to create a new expanded alignment + + /* + * now add offset left and right to create an expanded alignment + */ for (SequenceI s : sq) { SequenceI ds = s; @@ -98,8 +102,8 @@ public class AlignmentUtils } int s_end = s.findPosition(s.getStart() + s.getLength()); // find available flanking residues for sequence - int ustream_ds = s.getStart() - ds.getStart(), dstream_ds = ds - .getEnd() - s_end; + int ustream_ds = s.getStart() - ds.getStart(); + int dstream_ds = ds.getEnd() - s_end; // build new flanked sequence @@ -115,27 +119,27 @@ public class AlignmentUtils offset = maxoffset - flankSize; ustream_ds = flankSize; } - if (flankSize < dstream_ds) + if (flankSize <= dstream_ds) { - dstream_ds = flankSize; + dstream_ds = flankSize - 1; } } + // TODO use Character.toLowerCase to avoid creating String objects? char[] upstream = new String(ds.getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1)).toLowerCase().toCharArray(); - char[] downstream = new String(ds.getSequence(s_end - 1, s_end + 1 + char[] downstream = new String(ds.getSequence(s_end - 1, s_end + dstream_ds)).toLowerCase().toCharArray(); char[] coreseq = s.getSequence(); char[] nseq = new char[offset + upstream.length + downstream.length + coreseq.length]; char c = core.getGapCharacter(); - // TODO could lowercase the flanking regions + int p = 0; for (; p < offset; p++) { nseq[p] = c; } - // s.setSequence(new String(upstream).toLowerCase()+new String(coreseq) + - // new String(downstream).toLowerCase()); + System.arraycopy(upstream, 0, nseq, p, upstream.length); System.arraycopy(coreseq, 0, nseq, p + upstream.length, coreseq.length); @@ -153,6 +157,7 @@ public class AlignmentUtils { for (AlignmentAnnotation aa : s.getAnnotation()) { + aa.adjustForAlignment(); // JAL-1712 fix newAl.addAnnotation(aa); } } diff --git a/src/jalview/datamodel/AlignmentAnnotation.java b/src/jalview/datamodel/AlignmentAnnotation.java index 1bbe81e..b608139 100755 --- a/src/jalview/datamodel/AlignmentAnnotation.java +++ b/src/jalview/datamodel/AlignmentAnnotation.java @@ -20,10 +20,6 @@ */ package jalview.datamodel; -import jalview.analysis.Rna; -import jalview.analysis.SecStrConsensus.SimpleBP; -import jalview.analysis.WUSSParseException; - import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -32,6 +28,10 @@ import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; +import jalview.analysis.Rna; +import jalview.analysis.SecStrConsensus.SimpleBP; +import jalview.analysis.WUSSParseException; + /** * DOCUMENT ME! * @@ -1359,8 +1359,21 @@ public class AlignmentAnnotation { if (properties == null) { - return Collections.EMPTY_LIST; + return Collections.emptyList(); } return properties.keySet(); } + + /** + * Returns the Annotation for the given sequence position (base 1) if any, + * else null + * + * @param position + * @return + */ + public Annotation getAnnotationForPosition(int position) + { + return sequenceMapping == null ? null : sequenceMapping.get(position); + + } } diff --git a/src/jalview/datamodel/PDBEntry.java b/src/jalview/datamodel/PDBEntry.java index 0050666..eb955d6 100755 --- a/src/jalview/datamodel/PDBEntry.java +++ b/src/jalview/datamodel/PDBEntry.java @@ -125,6 +125,10 @@ public class PDBEntry return file; } + public void setType(String t) + { + this.type = t; + } public void setType(PDBEntry.Type type) { this.type = type.toString(); diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 9cec370..cab1ac7 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -20,9 +20,6 @@ */ package jalview.datamodel; -import jalview.analysis.AlignSeq; -import jalview.util.StringUtils; - import java.util.ArrayList; import java.util.Enumeration; import java.util.List; @@ -30,6 +27,9 @@ import java.util.Vector; import fr.orsay.lri.varna.models.rna.RNA; +import jalview.analysis.AlignSeq; +import jalview.util.StringUtils; + /** * * Implements the SequenceI interface for a char[] based sequence object. @@ -121,7 +121,7 @@ public class Sequence implements SequenceI .println("POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor."); name = ""; } - // Does sequence have the /start-end signiature? + // Does sequence have the /start-end signature? if (limitrx.search(name)) { name = limitrx.left(); @@ -379,7 +379,8 @@ public class Sequence implements SequenceI * @param id * DOCUMENT ME! */ - public void setPDBId(Vector id) + @Override + public void setPDBId(Vector id) { pdbIds = id; } @@ -389,7 +390,8 @@ public class Sequence implements SequenceI * * @return DOCUMENT ME! */ - public Vector getPDBId() + @Override + public Vector getPDBId() { return pdbIds; } @@ -947,7 +949,7 @@ public class Sequence implements SequenceI { if (this.annotation == null) { - this.annotation = new Vector(); + this.annotation = new Vector(); } if (!this.annotation.contains(annotation)) { diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 04f3588..38ae372 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -134,12 +134,13 @@ public interface SequenceI public char[] getSequence(int start, int end); /** - * create a new sequence object from start to end of this sequence + * create a new sequence object with a subsequence of this one but sharing the + * same dataset sequence * * @param start - * int index for start position + * int index for start position (base 0, inclusive) * @param end - * int index for end position + * int index for end position (base 0, exclusive) * * @return SequenceI * @note implementations may use getSequence to get the sequence data diff --git a/src/jalview/datamodel/UniprotEntry.java b/src/jalview/datamodel/UniprotEntry.java index 90702a2..4cf0f13 100755 --- a/src/jalview/datamodel/UniprotEntry.java +++ b/src/jalview/datamodel/UniprotEntry.java @@ -20,39 +20,44 @@ */ package jalview.datamodel; -import java.util.*; +import java.util.Vector; +/** + * Data model for an entry returned from a Uniprot query + * + * @see uniprot_mapping.xml + */ public class UniprotEntry { UniprotSequence sequence; - Vector name; + Vector name; - Vector accession; + Vector accession; - Vector feature; + Vector feature; - Vector dbrefs; + Vector dbrefs; UniprotProteinName protName; - public void setAccession(Vector items) + public void setAccession(Vector items) { accession = items; } - public void setFeature(Vector items) + public void setFeature(Vector items) { feature = items; } - public Vector getFeature() + public Vector getFeature() { return feature; } - public Vector getAccession() + public Vector getAccession() { return accession; } @@ -67,12 +72,12 @@ public class UniprotEntry return protName; } - public void setName(Vector na) + public void setName(Vector na) { name = na; } - public Vector getName() + public Vector getName() { return name; } @@ -87,12 +92,12 @@ public class UniprotEntry sequence = seq; } - public Vector getDbReference() + public Vector getDbReference() { return dbrefs; } - public void setDbReference(Vector dbref) + public void setDbReference(Vector dbref) { this.dbrefs = dbref; } diff --git a/src/jalview/datamodel/UniprotFile.java b/src/jalview/datamodel/UniprotFile.java index 44506a6..f0e38d8 100755 --- a/src/jalview/datamodel/UniprotFile.java +++ b/src/jalview/datamodel/UniprotFile.java @@ -20,18 +20,22 @@ */ package jalview.datamodel; -import java.util.*; +import java.util.Vector; +/** + * Data model of a retrieved Uniprot entry, as unmarshalled by Castor using a + * binding file (uniprot_mapping.xml) + */ public class UniprotFile { - Vector _items; + Vector _items; - public void setUniprotEntries(Vector items) + public void setUniprotEntries(Vector items) { _items = items; } - public Vector getUniprotEntries() + public Vector getUniprotEntries() { return _items; } diff --git a/src/jalview/datamodel/UniprotProteinName.java b/src/jalview/datamodel/UniprotProteinName.java index eb353aa..0a317e6 100755 --- a/src/jalview/datamodel/UniprotProteinName.java +++ b/src/jalview/datamodel/UniprotProteinName.java @@ -20,19 +20,26 @@ */ package jalview.datamodel; +import java.util.Vector; + +/** + * Data model for protein name returned from a Uniprot query + * + * Protein names are read from the Uniprot XML element + * uniprot/entry/protein/recommendedName/fullName + * + * @see uniprot_mapping.xml + */ public class UniprotProteinName { - /** - * internal content storage - */ - private java.util.Vector names; + private Vector names; - public void setName(java.util.Vector names) + public void setName(Vector names) { this.names = names; } - public java.util.Vector getName() + public Vector getName() { return names; } diff --git a/src/jalview/datamodel/UniprotSequence.java b/src/jalview/datamodel/UniprotSequence.java index 6ce751e..1150f1e 100755 --- a/src/jalview/datamodel/UniprotSequence.java +++ b/src/jalview/datamodel/UniprotSequence.java @@ -20,24 +20,34 @@ */ package jalview.datamodel; +/** + * Data model for the sequence returned by a Uniprot query + * + * @see uniprot_mapping.xml + */ public class UniprotSequence { + private String _content = ""; + /** - * internal content storage + * Sets the content string, omitting any space characters + * + * @param seq */ - private java.lang.String _content = ""; - public void setContent(String seq) { - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < seq.length(); i++) + if (seq != null) { - if (seq.charAt(i) != ' ') + StringBuilder sb = new StringBuilder(seq.length()); + for (int i = 0; i < seq.length(); i++) { - sb.append(seq.charAt(i)); + if (seq.charAt(i) != ' ') + { + sb.append(seq.charAt(i)); + } } + _content = sb.toString(); } - _content = sb.toString(); } public String getContent() diff --git a/src/jalview/datamodel/xdb/embl/BasePosition.java b/src/jalview/datamodel/xdb/embl/BasePosition.java index 070958a..3737adc 100644 --- a/src/jalview/datamodel/xdb/embl/BasePosition.java +++ b/src/jalview/datamodel/xdb/embl/BasePosition.java @@ -20,6 +20,12 @@ */ package jalview.datamodel.xdb.embl; +/** + * Data model for a feature/location/locationElement/basePosition read from an + * EMBL query reply + * + * @see embl_mapping.xml + */ public class BasePosition { String type; diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index fc57b27..3f890ba 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -20,6 +20,11 @@ */ package jalview.datamodel.xdb.embl; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Iterator; +import java.util.Vector; + import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.FeatureProperties; @@ -28,11 +33,14 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; -import java.util.Enumeration; -import java.util.Hashtable; -import java.util.Iterator; -import java.util.Vector; - +/** + * Data model for one entry returned from an EMBL query, as marshalled by a + * Castor binding file + * + * For example: http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml + * + * @see embl_mapping.xml + */ public class EmblEntry { String accession; @@ -49,13 +57,11 @@ public class EmblEntry String lastUpdated; - Vector keywords; - - Vector refs; + Vector keywords; - Vector dbRefs; + Vector dbRefs; - Vector features; + Vector features; EmblSequence sequence; @@ -79,7 +85,7 @@ public class EmblEntry /** * @return the dbRefs */ - public Vector getDbRefs() + public Vector getDbRefs() { return dbRefs; } @@ -88,7 +94,7 @@ public class EmblEntry * @param dbRefs * the dbRefs to set */ - public void setDbRefs(Vector dbRefs) + public void setDbRefs(Vector dbRefs) { this.dbRefs = dbRefs; } @@ -113,7 +119,7 @@ public class EmblEntry /** * @return the features */ - public Vector getFeatures() + public Vector getFeatures() { return features; } @@ -122,7 +128,7 @@ public class EmblEntry * @param features * the features to set */ - public void setFeatures(Vector features) + public void setFeatures(Vector features) { this.features = features; } @@ -130,7 +136,7 @@ public class EmblEntry /** * @return the keywords */ - public Vector getKeywords() + public Vector getKeywords() { return keywords; } @@ -139,7 +145,7 @@ public class EmblEntry * @param keywords * the keywords to set */ - public void setKeywords(Vector keywords) + public void setKeywords(Vector keywords) { this.keywords = keywords; } @@ -162,23 +168,6 @@ public class EmblEntry } /** - * @return the refs - */ - public Vector getRefs() - { - return refs; - } - - /** - * @param refs - * the refs to set - */ - public void setRefs(Vector refs) - { - this.refs = refs; - } - - /** * @return the releaseCreated */ public String getRCreated() @@ -190,7 +179,7 @@ public class EmblEntry * @param releaseCreated * the releaseCreated to set */ - public void setRcreated(String releaseCreated) + public void setRCreated(String releaseCreated) { this.rCreated = releaseCreated; } diff --git a/src/jalview/datamodel/xdb/embl/EmblError.java b/src/jalview/datamodel/xdb/embl/EmblError.java index e781a7e..94de28f 100644 --- a/src/jalview/datamodel/xdb/embl/EmblError.java +++ b/src/jalview/datamodel/xdb/embl/EmblError.java @@ -20,6 +20,11 @@ */ package jalview.datamodel.xdb.embl; +/** + * Data model mapped from any <error> elements returned from an EMBL query + * + * @see embl_mapping.xml + */ public class EmblError { String accession; diff --git a/src/jalview/datamodel/xdb/embl/EmblFeature.java b/src/jalview/datamodel/xdb/embl/EmblFeature.java index 077788c..7f53eb3 100644 --- a/src/jalview/datamodel/xdb/embl/EmblFeature.java +++ b/src/jalview/datamodel/xdb/embl/EmblFeature.java @@ -22,20 +22,27 @@ package jalview.datamodel.xdb.embl; import java.util.Vector; +import jalview.datamodel.DBRefEntry; + +/** + * Data model for a <feature> element returned from an EMBL query reply + * + * @see embl_mapping.xml + */ public class EmblFeature { String name; - Vector dbRefs; + Vector dbRefs; - Vector qualifiers; + Vector qualifiers; - Vector locations; + Vector locations; /** * @return the dbRefs */ - public Vector getDbRefs() + public Vector getDbRefs() { return dbRefs; } @@ -44,7 +51,7 @@ public class EmblFeature * @param dbRefs * the dbRefs to set */ - public void setDbRefs(Vector dbRefs) + public void setDbRefs(Vector dbRefs) { this.dbRefs = dbRefs; } @@ -52,7 +59,7 @@ public class EmblFeature /** * @return the locations */ - public Vector getLocations() + public Vector getLocations() { return locations; } @@ -61,7 +68,7 @@ public class EmblFeature * @param locations * the locations to set */ - public void setLocations(Vector locations) + public void setLocations(Vector locations) { this.locations = locations; } @@ -86,7 +93,7 @@ public class EmblFeature /** * @return the qualifiers */ - public Vector getQualifiers() + public Vector getQualifiers() { return qualifiers; } @@ -95,7 +102,7 @@ public class EmblFeature * @param qualifiers * the qualifiers to set */ - public void setQualifiers(Vector qualifiers) + public void setQualifiers(Vector qualifiers) { this.qualifiers = qualifiers; } diff --git a/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java b/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java index 10c3634..134ce9e 100644 --- a/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java +++ b/src/jalview/datamodel/xdb/embl/EmblFeatureLocElement.java @@ -20,6 +20,12 @@ */ package jalview.datamodel.xdb.embl; +/** + * Data model for a feature/location/locationElement read from an EMBL query + * reply + * + * @see embl_mapping.xml + */ public class EmblFeatureLocElement { String type; diff --git a/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java b/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java index 41ba739..eb0bee7 100644 --- a/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java +++ b/src/jalview/datamodel/xdb/embl/EmblFeatureLocations.java @@ -20,12 +20,17 @@ */ package jalview.datamodel.xdb.embl; -import java.util.Enumeration; import java.util.Vector; +/** + * Data model for a <loctaion> child element of a <feature> read + * from an EMBL query reply + * + * @see embl_mapping.xml + */ public class EmblFeatureLocations { - Vector locElements; + Vector locElements; String locationType; @@ -68,7 +73,7 @@ public class EmblFeatureLocations /** * @return the locElements */ - public Vector getLocElements() + public Vector getLocElements() { return locElements; } @@ -77,7 +82,7 @@ public class EmblFeatureLocations * @param locElements * the locElements to set */ - public void setLocElements(Vector locElements) + public void setLocElements(Vector locElements) { this.locElements = locElements; } @@ -110,12 +115,10 @@ public class EmblFeatureLocations { int sepos = 0; int[] se = new int[locElements.size() * 2]; - if (locationType.equalsIgnoreCase("single")) + if (locationType.equalsIgnoreCase("single")) // TODO: or "simple" ? { - for (Enumeration le = locElements.elements(); le.hasMoreElements();) + for (EmblFeatureLocElement loce : locElements) { - EmblFeatureLocElement loce = (EmblFeatureLocElement) le - .nextElement(); if (accession == null || loce.accession != null && accession.equals(loce.accession)) { @@ -130,10 +133,8 @@ public class EmblFeatureLocations } else if (locationType.equalsIgnoreCase("join")) { - for (Enumeration le = locElements.elements(); le.hasMoreElements();) + for (EmblFeatureLocElement loce : locElements) { - EmblFeatureLocElement loce = (EmblFeatureLocElement) le - .nextElement(); if (accession == null || loce.accession != null && accession.equals(loce.accession)) { @@ -150,13 +151,17 @@ public class EmblFeatureLocations else if (locationType != null) { if (jalview.bin.Cache.log != null) + { jalview.bin.Cache.log .error("EmbleFeatureLocations.getElementRanges cannot deal with locationType=='" + locationType + "'"); + } else + { System.err .println("EmbleFeatureLocations.getElementRanges cannot deal with locationType=='" + locationType + "'"); + } } // trim range if necessary. if (se != null && sepos != se.length) diff --git a/src/jalview/datamodel/xdb/embl/EmblFile.java b/src/jalview/datamodel/xdb/embl/EmblFile.java index 3ca3755..2129054 100644 --- a/src/jalview/datamodel/xdb/embl/EmblFile.java +++ b/src/jalview/datamodel/xdb/embl/EmblFile.java @@ -22,22 +22,31 @@ package jalview.datamodel.xdb.embl; import java.io.File; import java.io.FileReader; +import java.io.PrintWriter; import java.io.Reader; import java.util.Vector; import org.exolab.castor.mapping.Mapping; import org.exolab.castor.xml.Unmarshaller; +/** + * Data model for entries returned from an EMBL query, as marshalled by a Castor + * binding file + * + * For example: http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml + * + * @see embl_mapping.xml + */ public class EmblFile { - Vector entries; + Vector entries; - Vector errors; + Vector errors; /** * @return the entries */ - public Vector getEntries() + public Vector getEntries() { return entries; } @@ -46,7 +55,7 @@ public class EmblFile * @param entries * the entries to set */ - public void setEntries(Vector entries) + public void setEntries(Vector entries) { this.entries = entries; } @@ -54,7 +63,7 @@ public class EmblFile /** * @return the errors */ - public Vector getErrors() + public Vector getErrors() { return errors; } @@ -63,7 +72,7 @@ public class EmblFile * @param errors * the errors to set */ - public void setErrors(Vector errors) + public void setErrors(Vector errors) { this.errors = errors; } @@ -77,7 +86,9 @@ public class EmblFile public static EmblFile getEmblFile(File file) { if (file == null) + { return null; + } try { return EmblFile.getEmblFile(new FileReader(file)); @@ -96,6 +107,7 @@ public class EmblFile { // 1. Load the mapping information from the file Mapping map = new Mapping(record.getClass().getClassLoader()); + java.net.URL url = record.getClass().getResource("/embl_mapping.xml"); map.loadMapping(url); @@ -104,17 +116,19 @@ public class EmblFile try { // uncomment to DEBUG EMBLFile reading - if (((String) jalview.bin.Cache.getDefault( - jalview.bin.Cache.CASTORLOGLEVEL, "debug")) + if (jalview.bin.Cache.getDefault( + jalview.bin.Cache.CASTORLOGLEVEL, "debug") .equalsIgnoreCase("DEBUG")) + { unmar.setDebug(jalview.bin.Cache.log.isDebugEnabled()); + } } catch (Exception e) { } - ; unmar.setIgnoreExtraElements(true); + unmar.setIgnoreExtraAttributes(true); unmar.setMapping(map); - + unmar.setLogWriter(new PrintWriter(System.out)); record = (EmblFile) unmar.unmarshal(file); } catch (Exception e) { @@ -124,23 +138,4 @@ public class EmblFile return record; } - - public static void main(String args[]) - { - File mf = null; - if (args.length == 1) - { - mf = new File(args[0]); - } - if (!mf.exists()) - { - mf = new File( - "C:\\Documents and Settings\\JimP\\workspace-3.2\\Jalview Release\\schemas\\embleRecordV1.1.xml"); - } - EmblFile myfile = EmblFile.getEmblFile(mf); - if (myfile != null && myfile.entries != null - && myfile.entries.size() > 0) - System.out.println(myfile.entries.size() + " Records read. (" + mf - + ")"); - } } diff --git a/src/jalview/datamodel/xdb/embl/EmblSequence.java b/src/jalview/datamodel/xdb/embl/EmblSequence.java index 406ef2e..2a6fa84 100644 --- a/src/jalview/datamodel/xdb/embl/EmblSequence.java +++ b/src/jalview/datamodel/xdb/embl/EmblSequence.java @@ -20,6 +20,11 @@ */ package jalview.datamodel.xdb.embl; +/** + * Data model for the sequence extracted from an EMBL query reply + * + * @see embl_mapping.xml + */ public class EmblSequence { String version; diff --git a/src/jalview/datamodel/xdb/embl/Qualifier.java b/src/jalview/datamodel/xdb/embl/Qualifier.java index 9ab7f38..851dd48 100644 --- a/src/jalview/datamodel/xdb/embl/Qualifier.java +++ b/src/jalview/datamodel/xdb/embl/Qualifier.java @@ -20,6 +20,12 @@ */ package jalview.datamodel.xdb.embl; +/** + * Data model for a <qualifier> child element of a <feature> read + * from an EMBL query reply + * + * @see embl_mapping.xml + */ public class Qualifier { String name; @@ -64,6 +70,7 @@ public class Qualifier public void addEvidence(String qevidence) { + // TODO - not used? can remove? if (evidence == null) { evidence = new String[1]; @@ -79,6 +86,7 @@ public class Qualifier public void addValues(String value) { + // TODO - not used? can remove? if (values == null) { values = new String[1]; diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java index 58ee42d..92e863a 100644 --- a/src/jalview/ws/dbsources/EmblXmlSource.java +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -20,23 +20,23 @@ */ package jalview.ws.dbsources; +import java.io.File; + import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceI; import jalview.datamodel.xdb.embl.EmblEntry; +import jalview.datamodel.xdb.embl.EmblFile; import jalview.util.MessageManager; import jalview.ws.ebi.EBIFetchClient; -import java.io.File; -import java.util.Iterator; - public abstract class EmblXmlSource extends EbiFileRetrievedProxy { /** * Last properly parsed embl file. */ - public jalview.datamodel.xdb.embl.EmblFile efile = null; + public EmblFile efile = null; public EmblXmlSource() { @@ -94,7 +94,7 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy file = reply.getAbsolutePath(); if (reply.length() > 25) { - efile = jalview.datamodel.xdb.embl.EmblFile.getEmblFile(reply); + efile = EmblFile.getEmblFile(reply); } else { @@ -103,19 +103,10 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy } if (efile != null) { - for (Iterator i = efile.getEntries().iterator(); i.hasNext();) + for (EmblEntry entry : efile.getEntries()) { - EmblEntry entry = (EmblEntry) i.next(); - SequenceI[] seqparts = entry.getSequences(false, true, emprefx); // TODO: - // use - // !fetchNa,!fetchPeptide - // here - // instead - // - - // see - // todo - // in - // emblEntry + SequenceI[] seqparts = entry.getSequences(false, true, emprefx); + // TODO: use !fetchNa,!fetchPeptide here instead - see todo in EmblEntry if (seqparts != null) { SequenceI[] newseqs = null; @@ -136,8 +127,8 @@ public abstract class EmblXmlSource extends EbiFileRetrievedProxy } for (int j = 0; j < seqparts.length; si++, j++) { - newseqs[si] = seqparts[j].deriveSequence(); // place DBReferences on - // dataset and refer + newseqs[si] = seqparts[j].deriveSequence(); + // place DBReferences on dataset and refer } seqs = newseqs; diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index f0e5de0..6a871ee 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -22,7 +22,7 @@ package jalview.ws.dbsources; import java.io.File; import java.io.FileReader; -import java.util.Enumeration; +import java.io.Reader; import java.util.Vector; import org.exolab.castor.xml.Unmarshaller; @@ -48,6 +48,16 @@ import jalview.ws.seqfetcher.DbSourceProxyImpl; */ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy { + + private static final String BAR_DELIMITER = "|"; + + private static final String NEWLINE = "\n"; + + private static org.exolab.castor.mapping.Mapping map; + + /** + * Constructor + */ public Uniprot() { super(); @@ -96,11 +106,15 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy return "0"; // we really don't know what version we're on. } - private EBIFetchClient ebi = null; - - private static org.exolab.castor.mapping.Mapping map; - - public Vector getUniprotEntries(File file) + /** + * Reads a file containing the reply to the EBI Fetch Uniprot data query, + * unmarshals it to a UniprotFile object, and returns the list of UniprotEntry + * data models (mapped from <entry> elements) + * + * @param fileReader + * @return + */ + public Vector getUniprotEntries(Reader fileReader) { UniprotFile uni = new UniprotFile(); try @@ -118,9 +132,9 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy Unmarshaller unmar = new Unmarshaller(uni); unmar.setIgnoreExtraElements(true); unmar.setMapping(map); - if (file != null) + if (fileReader != null) { - uni = (UniprotFile) unmar.unmarshal(new FileReader(file)); + uni = (UniprotFile) unmar.unmarshal(fileReader); } } catch (Exception e) { @@ -143,48 +157,28 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy queries = queries.toUpperCase().replaceAll( "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", ""); Alignment al = null; - ebi = new EBIFetchClient(); - StringBuffer result = new StringBuffer(); + EBIFetchClient ebi = new EBIFetchClient(); // uniprotxml parameter required since december 2007 // uniprotkb dbname changed introduced december 2008 File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml", null); - Vector entries = getUniprotEntries(file); + Vector entries = getUniprotEntries(new FileReader(file)); if (entries != null) { + /* + * If Castor binding included sequence@length, we could guesstimate the + * size of buffer to hold the alignment + */ + StringBuffer result = new StringBuffer(128); // First, make the new sequences - Enumeration en = entries.elements(); - while (en.hasMoreElements()) + for (UniprotEntry entry : entries) { - UniprotEntry entry = (UniprotEntry) en.nextElement(); - - StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot"); - Enumeration en2 = entry.getAccession().elements(); - while (en2.hasMoreElements()) - { - name.append("|"); - name.append(en2.nextElement()); - } - en2 = entry.getName().elements(); - while (en2.hasMoreElements()) - { - name.append("|"); - name.append(en2.nextElement()); - } - - if (entry.getProtein() != null - && entry.getProtein().getName() != null) - { - for (int nm = 0, nmSize = entry.getProtein().getName().size(); nm < nmSize; nm++) - { - name.append(" " + entry.getProtein().getName().elementAt(nm)); - } - } - - result.append(name + "\n" - + entry.getUniprotSequence().getContent() + "\n"); + StringBuilder name = constructSequenceFastaHeader(entry); + result.append(name).append(NEWLINE) + .append(entry.getUniprotSequence().getContent()) + .append(NEWLINE); } // Then read in the features and apply them to the dataset @@ -209,63 +203,95 @@ public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy } /** + * Construct a Fasta-format sequence header by concatenating the source, + * accession id(s) and name(s), delimited by '|', plus any protein names, now + * with space rather than bar delimiter + * + * @param entry + * @return + */ + public static StringBuilder constructSequenceFastaHeader( + UniprotEntry entry) + { + StringBuilder name = new StringBuilder(32); + name.append(">UniProt/Swiss-Prot"); + for (String accessionId : entry.getAccession()) + { + name.append(BAR_DELIMITER); + name.append(accessionId); + } + for (String n : entry.getName()) + { + name.append(BAR_DELIMITER); + name.append(n); + } + + if (entry.getProtein() != null + && entry.getProtein().getName() != null) + { + for (String nm : entry.getProtein().getName()) + { + name.append(" ").append(nm); + } + } + return name; + } + + /** * add an ordered set of UniprotEntry objects to an ordered set of seuqences. * * @param al * - a sequence of n sequences * @param entries - * a seuqence of n uniprot entries to be analysed. + * a list of n uniprot entries to be analysed. */ - public void addUniprotXrefs(Alignment al, Vector entries) + public void addUniprotXrefs(Alignment al, Vector entries) { + final String dbVersion = getDbVersion(); + for (int i = 0; i < entries.size(); i++) { - UniprotEntry entry = (UniprotEntry) entries.elementAt(i); - Enumeration e = entry.getDbReference().elements(); - Vector onlyPdbEntries = new Vector(); - Vector dbxrefs = new Vector(); - while (e.hasMoreElements()) + UniprotEntry entry = entries.elementAt(i); + Vector onlyPdbEntries = new Vector(); + Vector dbxrefs = new Vector(); + + for (PDBEntry pdb : entry.getDbReference()) { - PDBEntry pdb = (PDBEntry) e.nextElement(); DBRefEntry dbr = new DBRefEntry(); dbr.setSource(pdb.getType()); dbr.setAccessionId(pdb.getId()); - dbr.setVersion(DBRefSource.UNIPROT + ":" + getDbVersion()); + dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion); dbxrefs.addElement(dbr); - if (!pdb.getType().equals("PDB")) + if ("PDB".equals(pdb.getType())) { - continue; + onlyPdbEntries.addElement(pdb); } - - onlyPdbEntries.addElement(pdb); } + SequenceI sq = al.getSequenceAt(i); while (sq.getDatasetSequence() != null) { sq = sq.getDatasetSequence(); } - Enumeration en2 = entry.getAccession().elements(); - while (en2.hasMoreElements()) + for (String accessionId : entry.getAccession()) { - // we always add as uniprot if we retrieved from uniprot or uniprot name - sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, getDbVersion(), en2 - .nextElement().toString())); + /* + * add as uniprot whether retrieved from uniprot or uniprot_name + */ + sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, dbVersion, + accessionId)); } - en2 = dbxrefs.elements(); - while (en2.hasMoreElements()) - { - // we always add as uniprot if we retrieved from uniprot or uniprot name - sq.addDBRef((DBRefEntry) en2.nextElement()); + for (DBRefEntry dbRef : dbxrefs) + { + sq.addDBRef(dbRef); } sq.setPDBId(onlyPdbEntries); if (entry.getFeature() != null) { - e = entry.getFeature().elements(); - while (e.hasMoreElements()) + for (SequenceFeature sf : entry.getFeature()) { - SequenceFeature sf = (SequenceFeature) e.nextElement(); sf.setFeatureGroup("Uniprot"); sq.addSequenceFeature(sf); } diff --git a/src/jalview/ws/ebi/EBIFetchClient.java b/src/jalview/ws/ebi/EBIFetchClient.java index cb61cd4..a785899 100644 --- a/src/jalview/ws/ebi/EBIFetchClient.java +++ b/src/jalview/ws/ebi/EBIFetchClient.java @@ -20,17 +20,19 @@ */ package jalview.ws.ebi; -import jalview.util.MessageManager; - import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; +import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.ArrayList; +import java.util.List; import java.util.StringTokenizer; +import jalview.util.MessageManager; + /** * DOCUMENT ME! * @@ -83,6 +85,18 @@ public class EBIFetchClient throw new Error(MessageManager.getString("error.not_yet_implemented")); } + /** + * Send an HTTP fetch request to EBI and save the reply in a temporary file. + * + * @param ids + * the query formatted as db:query1;query2;query3 + * @param f + * the format wanted + * @param s + * - unused parameter + * @return the file holding the response + * @throws OutOfMemoryError + */ public File fetchDataAsFile(String ids, String f, String s) throws OutOfMemoryError { @@ -111,7 +125,7 @@ public class EBIFetchClient * @param f * raw/xml * @param s - * ? + * not used - remove? * * @return Raw string array result of query set */ @@ -180,7 +194,7 @@ public class EBIFetchClient + db.toLowerCase() + "/" + ids.toLowerCase() + (f != null ? "/" + f : "")); - BufferedInputStream is = new BufferedInputStream(rcall.openStream()); + InputStream is = new BufferedInputStream(rcall.openStream()); if (outFile != null) { FileOutputStream fio = new FileOutputStream(outFile); @@ -197,7 +211,7 @@ public class EBIFetchClient { BufferedReader br = new BufferedReader(new InputStreamReader(is)); String rtn; - ArrayList arl = new ArrayList(); + List arl = new ArrayList(); while ((rtn = br.readLine()) != null) { arl.add(rtn); diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 9ef1b9a..6c06955 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -22,6 +22,7 @@ package jalview.analysis; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; @@ -89,13 +90,16 @@ public class AlignmentUtilsTests "GGGTCAGGCAGT\n"; // @formatter:on - public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD"); + // public static Sequence ts=new + // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD"); + public static Sequence ts = new Sequence("short", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); @Test - public void testExpandFlanks() + public void testExpandContext() { AlignmentI al = new Alignment(new Sequence[] {}); - for (int i=4;i<14;i+=3) + for (int i = 4; i < 14; i += 2) { SequenceI s1=ts.deriveSequence().getSubSequence(i, i+7); al.addSequence(s1); @@ -103,18 +107,135 @@ public class AlignmentUtilsTests System.out.println(new AppletFormatAdapter().formatSequences("Clustal", al, true)); for (int flnk=-1;flnk<25; flnk++) { - AlignmentI exp; - System.out.println("\nFlank size: "+flnk); - System.out.println(new AppletFormatAdapter().formatSequences("Clustal", exp=AlignmentUtils.expandContext(al, flnk), true)); - if (flnk==-1) { - for (SequenceI sq:exp.getSequences()) + AlignmentI exp = AlignmentUtils.expandContext(al, flnk); + System.out.println("\nFlank size: " + flnk); + System.out.println(new AppletFormatAdapter().formatSequences( + "Clustal", exp, true)); + if (flnk == -1) { + /* + * Full expansion to complete sequences + */ + for (SequenceI sq : exp.getSequences()) + { String ung = sq.getSequenceAsString().replaceAll("-+", ""); - assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString())); + final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n" + + ung + + "\n" + + sq.getDatasetSequence().getSequenceAsString(); + assertTrue(errorMsg, ung.equalsIgnoreCase(sq.getDatasetSequence() + .getSequenceAsString())); + } } + else if (flnk == 24) + { + /* + * Last sequence is fully expanded, others have leading gaps to match + */ + assertTrue(exp.getSequenceAt(4).getSequenceAsString() + .startsWith("abc")); + assertTrue(exp.getSequenceAt(3).getSequenceAsString() + .startsWith("--abc")); + assertTrue(exp.getSequenceAt(2).getSequenceAsString() + .startsWith("----abc")); + assertTrue(exp.getSequenceAt(1).getSequenceAsString() + .startsWith("------abc")); + assertTrue(exp.getSequenceAt(0).getSequenceAsString() + .startsWith("--------abc")); } } - } + } + + /** + * Test that annotations are correctly adjusted by expandContext + */ + @Test + public void testExpandContext_annotation() + { + AlignmentI al = new Alignment(new Sequence[] + {}); + SequenceI ds = new Sequence("Seq1", "ABCDEFGHI"); + // subsequence DEF: + SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6); + al.addSequence(seq1); + + /* + * Annotate DEF with 4/5/6 respectively + */ + Annotation[] anns = new Annotation[] + { new Annotation(4), new Annotation(5), new Annotation(6) }; + AlignmentAnnotation ann = new AlignmentAnnotation("SS", + "secondary structure", anns); + seq1.addAlignmentAnnotation(ann); + + /* + * The annotations array should match aligned positions + */ + assertEquals(3, ann.annotations.length); + assertEquals(4, ann.annotations[0].value, 0.001); + assertEquals(5, ann.annotations[1].value, 0.001); + assertEquals(6, ann.annotations[2].value, 0.001); + + /* + * Check annotation to sequence position mappings before expanding the + * sequence; these are set up in Sequence.addAlignmentAnnotation -> + * Annotation.setSequenceRef -> createSequenceMappings + */ + assertNull(ann.getAnnotationForPosition(1)); + assertNull(ann.getAnnotationForPosition(2)); + assertNull(ann.getAnnotationForPosition(3)); + assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001); + assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001); + assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001); + assertNull(ann.getAnnotationForPosition(7)); + assertNull(ann.getAnnotationForPosition(8)); + assertNull(ann.getAnnotationForPosition(9)); + + /* + * Expand the subsequence to the full sequence abcDEFghi + */ + AlignmentI expanded = AlignmentUtils.expandContext(al, -1); + // FIXME expandContext adds an unnecessary gap; need tests to cover all + // cases for which 'maxOffset' is computed + assertEquals("-abcDEFghi", expanded.getSequenceAt(0) + .getSequenceAsString()); + + /* + * Confirm the alignment and sequence have the same SS annotation, + * referencing the expanded sequence + */ + ann = expanded.getSequenceAt(0).getAnnotation()[0]; + assertSame(ann, expanded.getAlignmentAnnotation()[0]); + assertSame(expanded.getSequenceAt(0), ann.sequenceRef); + + /* + * The annotations array should have null values except for annotated + * positions + */ + assertNull(ann.annotations[0]); + assertNull(ann.annotations[1]); + assertNull(ann.annotations[2]); + assertNull(ann.annotations[3]); + assertEquals(4, ann.annotations[4].value, 0.001); + assertEquals(5, ann.annotations[5].value, 0.001); + assertEquals(6, ann.annotations[6].value, 0.001); + assertNull(ann.annotations[7]); + assertNull(ann.annotations[8]); + assertNull(ann.annotations[9]); + + /* + * sequence position mappings should be unchanged + */ + assertNull(ann.getAnnotationForPosition(1)); + assertNull(ann.getAnnotationForPosition(2)); + assertNull(ann.getAnnotationForPosition(3)); + assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001); + assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001); + assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001); + assertNull(ann.getAnnotationForPosition(7)); + assertNull(ann.getAnnotationForPosition(8)); + assertNull(ann.getAnnotationForPosition(9)); + } /** * Test method that returns a map of lists of sequences by sequence name. diff --git a/test/jalview/datamodel/AlignmentAnnotationTests.java b/test/jalview/datamodel/AlignmentAnnotationTests.java index f23b3d2..7c1aa81 100644 --- a/test/jalview/datamodel/AlignmentAnnotationTests.java +++ b/test/jalview/datamodel/AlignmentAnnotationTests.java @@ -1,11 +1,13 @@ package jalview.datamodel; import static org.junit.Assert.assertEquals; -import jalview.analysis.AlignSeq; -import jalview.io.AppletFormatAdapter; +import static org.junit.Assert.assertNull; import org.junit.Test; +import jalview.analysis.AlignSeq; +import jalview.io.AppletFormatAdapter; + public class AlignmentAnnotationTests { @Test @@ -153,7 +155,48 @@ public class AlignmentAnnotationTests : "Out of range"); assertEquals("Position " + p + " " + alm1 + " " + alm2, alm1, alm2); } - // new jalview.io.FormatAdapter().formatSequences("STOCKHOLM", n) } + @Test + public void testAdjustForAlignment() + { + SequenceI seq = new Sequence("TestSeq", "ABCDEFG"); + seq.createDatasetSequence(); + + /* + * Annotate positions 3/4/5 (CDE) with values 1/2/3 + */ + Annotation[] anns = new Annotation[] + { null, null, new Annotation(1), new Annotation(2), new Annotation(3) }; + AlignmentAnnotation ann = new AlignmentAnnotation("SS", + "secondary structure", anns); + seq.addAlignmentAnnotation(ann); + + /* + * Check annotation map before modifying aligned sequence + */ + assertNull(ann.getAnnotationForPosition(1)); + assertNull(ann.getAnnotationForPosition(2)); + assertNull(ann.getAnnotationForPosition(6)); + assertNull(ann.getAnnotationForPosition(7)); + assertEquals(1, ann.getAnnotationForPosition(3).value, 0.001d); + assertEquals(2, ann.getAnnotationForPosition(4).value, 0.001d); + assertEquals(3, ann.getAnnotationForPosition(5).value, 0.001d); + + /* + * Trim the displayed sequence to BCD and adjust annotations + */ + seq.setSequence("BCD"); + seq.setStart(2); + seq.setEnd(4); + ann.adjustForAlignment(); + + /* + * Should now have annotations for aligned positions 2, 3Q (CD) only + */ + assertEquals(3, ann.annotations.length); + assertNull(ann.annotations[0]); + assertEquals(1, ann.annotations[1].value, 0.001); + assertEquals(2, ann.annotations[2].value, 0.001); + } } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index ea23bfe..5e73bbc 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -310,4 +310,90 @@ public class SequenceTest seq.getDatasetSequence().setDatasetSequence(seq); // loop! assertNull(seq.getSequenceFeatures()); } + + /** + * Test the method that returns an array, indexed by sequence position, whose + * entries are the residue positions at the sequence position (or to the right + * if a gap) + */ + @Test + public void testFindPositionMap() + { + /* + * Note: Javadoc for findPosition says it returns the residue position to + * the left of a gapped position; in fact it returns the position to the + * right. Also it returns a non-existent residue position for a gap beyond + * the sequence. + */ + Sequence seq = new Sequence("TestSeq", "AB.C-D E."); + int[] map = seq.findPositionMap(); + assertEquals(Arrays.toString(new int[] + { 1, 2, 3, 3, 4, 4, 5, 5, 6 }), Arrays.toString(map)); + } + + /** + * Test for getSubsequence + */ + @Test + public void testGetSubsequence() + { + SequenceI seq = new Sequence("TestSeq", "ABCDEFG"); + seq.createDatasetSequence(); + + // positions are base 0, end position is exclusive + SequenceI subseq = seq.getSubSequence(2, 4); + + assertEquals("CD", subseq.getSequenceAsString()); + // start/end are base 1 positions + assertEquals(3, subseq.getStart()); + assertEquals(4, subseq.getEnd()); + // subsequence shares the full dataset sequence + assertSame(seq.getDatasetSequence(), subseq.getDatasetSequence()); + } + + /** + * Test for deriveSequence applied to a sequence with a dataset + */ + @Test + public void testDeriveSequence_existingDataset() + { + SequenceI seq = new Sequence("Seq1", "CD"); + seq.setDatasetSequence(new Sequence("Seq1", "ABCDEF")); + seq.setStart(3); + seq.setEnd(4); + SequenceI derived = seq.deriveSequence(); + assertEquals("CD", derived.getSequenceAsString()); + assertSame(seq.getDatasetSequence(), derived.getDatasetSequence()); + } + + /** + * Test for deriveSequence applied to an ungapped sequence with no dataset + */ + @Test + public void testDeriveSequence_noDatasetUngapped() + { + SequenceI seq = new Sequence("Seq1", "ABCDEF"); + assertEquals(1, seq.getStart()); + assertEquals(6, seq.getEnd()); + SequenceI derived = seq.deriveSequence(); + assertEquals("ABCDEF", derived.getSequenceAsString()); + assertEquals("ABCDEF", derived.getDatasetSequence() + .getSequenceAsString()); + } + + /** + * Test for deriveSequence applied to a gapped sequence with no dataset + */ + @Test + public void testDeriveSequence_noDatasetGapped() + { + SequenceI seq = new Sequence("Seq1", "AB-C.D EF"); + assertEquals(1, seq.getStart()); + assertEquals(6, seq.getEnd()); + assertNull(seq.getDatasetSequence()); + SequenceI derived = seq.deriveSequence(); + assertEquals("AB-C.D EF", derived.getSequenceAsString()); + assertEquals("ABCDEF", derived.getDatasetSequence() + .getSequenceAsString()); + } } diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java new file mode 100644 index 0000000..e89245c --- /dev/null +++ b/test/jalview/datamodel/xdb/embl/EmblFileTest.java @@ -0,0 +1,127 @@ +package jalview.datamodel.xdb.embl; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.StringReader; +import java.util.Vector; + +import org.junit.Test; + +import jalview.datamodel.DBRefEntry; + +public class EmblFileTest +{ + // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml + private static final String TESTDATA = + "" + + "" + + "" + + "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)" + + "L-lactate dehydrogenasechutney" + + "" + + "" + + "L-lactate dehydrogenase A-chainpickle" + + "MSLKDHLIHNKeith" + + "" + + "" + + "601058" + + "" + + "GTGACG"; + + @Test + public void testGetEmblFile() + { + Vector entries = EmblFile.getEmblFile( + new StringReader(TESTDATA)).getEntries(); + assertEquals(1, entries.size()); + EmblEntry entry = entries.get(0); + + assertEquals("X53828", entry.getAccession()); + assertEquals( + "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)", + entry.getDesc()); + assertEquals("2005-04-18", entry.getLastUpdated()); + + /* + * FIXME these assertions fail - values are null - why?? Adding or removing + * attributes in the test XML modifies behaviour. eg. inserting an attribute + * _before_ lastUpdated results in a null value in this field. + */ + // assertEquals("25", entry.getRCreated()); + // assertEquals("83", entry.getRLastUpdated()); + + assertEquals(2, entry.getKeywords().size()); + assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0)); + assertEquals("chutney", entry.getKeywords().get(1)); + + /* + * dbrefs + */ + assertEquals(1, entry.getDbRefs().size()); + DBRefEntry dbref = entry.getDbRefs().get(0); + assertEquals("EuropePMC", dbref.getSource()); + assertEquals("PMC1460223", dbref.getAccessionId()); + assertEquals("9649548", dbref.getVersion()); + + /* + * sequence features + */ + assertEquals(1, entry.getFeatures().size()); + EmblFeature ef = entry.getFeatures().get(0); + assertEquals("CDS", ef.getName()); + assertEquals(2, ef.getDbRefs().size()); + dbref = ef.getDbRefs().get(0); + assertEquals("GOA", dbref.getSource()); + assertEquals("P00340", dbref.getAccessionId()); + assertEquals("2.1", dbref.getVersion()); + dbref = ef.getDbRefs().get(1); + assertEquals("InterPro", dbref.getSource()); + assertEquals("IPR001236", dbref.getAccessionId()); + assertEquals("", dbref.getVersion()); + assertEquals(2, ef.getQualifiers().size()); + + // feature qualifiers + Qualifier q = ef.getQualifiers().get(0); + assertEquals("note", q.getName()); + assertEquals(2, q.getValues().length); + assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]); + assertEquals("pickle", q.getValues()[1]); + assertNull(q.getEvidence()); + q = ef.getQualifiers().get(1); + assertEquals("translation", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("MSLKDHLIHN", q.getValues()[0]); + assertEquals(1, q.getEvidence().length); + assertEquals("Keith", q.getEvidence()[0]); + + // feature locations + assertEquals(1, ef.getLocations().size()); + EmblFeatureLocations fl = ef.getLocations().get(0); + assertEquals("single", fl.getLocationType()); + assertTrue(fl.isLocationComplement()); + assertEquals(1, fl.getLocElements().size()); + EmblFeatureLocElement le = fl.getLocElements().get(0); + assertEquals("range", le.getType()); + assertEquals("X53828", le.getAccession()); + assertEquals("1", le.getVersion()); + assertFalse(le.isComplement()); + assertEquals(2, le.getBasePositions().length); + BasePosition bp = le.getBasePositions()[0]; + assertEquals("simple", bp.getType()); + assertEquals("60", bp.getPos()); + bp = le.getBasePositions()[1]; + assertEquals("join", bp.getType()); + assertEquals("1058", bp.getPos()); + + /* + * Sequence + */ + EmblSequence seq = entry.getSequence(); + assertEquals("mRNA", seq.getType()); + assertEquals("2", seq.getVersion()); + assertEquals("GTGACG", seq.getSequence()); + } +} diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java new file mode 100644 index 0000000..7325527 --- /dev/null +++ b/test/jalview/ws/dbsources/UniprotTest.java @@ -0,0 +1,124 @@ +package jalview.ws.dbsources; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.Reader; +import java.io.StringReader; +import java.util.Vector; + +import org.junit.Test; + +import jalview.datamodel.PDBEntry; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.UniprotEntry; + +public class UniprotTest +{ + // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml + private static final String UNIPROT_XML = "" + + "" + + "" + + "A9CKP4" + + "A9CKP5" + + "A9CKP4_AGRT5" + + "A9CKP4_AGRT6" + + "Mitogen-activated protein kinase 13Henry" + + "" + + "" + + "" + + "" + + "" + + "MHAPL VSKDL" + + ""; + + /** + * Test the method that unmarshals XML to a Uniprot model + */ + @Test + public void testGetUniprotEntries() + { + Uniprot u = new Uniprot(); + Reader reader = new StringReader(UNIPROT_XML); + Vector entries = u.getUniprotEntries(reader); + assertEquals(1, entries.size()); + UniprotEntry entry = entries.get(0); + assertEquals(2, entry.getName().size()); + assertEquals("A9CKP4_AGRT5", entry.getName().get(0)); + assertEquals("A9CKP4_AGRT6", entry.getName().get(1)); + assertEquals(2, entry.getAccession().size()); + assertEquals("A9CKP4", entry.getAccession().get(0)); + assertEquals("A9CKP5", entry.getAccession().get(1)); + + /* + * UniprotSequence drops any space characters + */ + assertEquals("MHAPLVSKDL", entry.getUniprotSequence() + .getContent()); + + assertEquals(2, entry.getProtein().getName().size()); + assertEquals("Mitogen-activated protein kinase 13", entry.getProtein() + .getName().get(0)); + assertEquals("Henry", entry.getProtein().getName().get(1)); + + /* + * Check sequence features + */ + Vector features = entry.getFeature(); + assertEquals(3, features.size()); + SequenceFeature sf = features.get(0); + assertEquals("signal peptide", sf.getType()); + assertNull(sf.getDescription()); + assertNull(sf.getStatus()); + assertEquals(1, sf.getPosition()); // wrong - Castor bug?? + assertEquals(1, sf.getBegin()); + assertEquals(18, sf.getEnd()); + sf = features.get(1); + assertEquals("propeptide", sf.getType()); + assertEquals("Activation peptide", sf.getDescription()); + assertEquals(19, sf.getPosition()); // wrong - Castor bug?? + assertEquals(19, sf.getBegin()); + assertEquals(20, sf.getEnd()); + sf = features.get(2); + assertEquals("chain", sf.getType()); + assertEquals("Granzyme B", sf.getDescription()); + assertEquals(21, sf.getPosition()); // wrong - Castor bug?? + assertEquals(21, sf.getBegin()); + assertEquals(247, sf.getEnd()); + + /* + * Check cross-references + */ + Vector xrefs = entry.getDbReference(); + assertEquals(2, xrefs.size()); + + PDBEntry xref = xrefs.get(0); + assertEquals("2FSQ", xref.getId()); + assertEquals("PDB", xref.getType()); + assertEquals(2, xref.getProperty().size()); + assertEquals("X-ray", xref.getProperty().get("method")); + assertEquals("1.40", xref.getProperty().get("resolution")); + + xref = xrefs.get(1); + assertEquals("2FSR", xref.getId()); + assertEquals("PDBsum", xref.getType()); + assertNull(xref.getProperty()); + } + + /** + * Test the method that formats the sequence name in Fasta style + */ + @Test + public void testConstructSequenceFastaHeader() + { + Uniprot u = new Uniprot(); + Reader reader = new StringReader(UNIPROT_XML); + Vector entries = u.getUniprotEntries(reader); + UniprotEntry entry = entries.get(0); + + // source + accession ids + names + protein names + String expectedName = ">UniProt/Swiss-Prot|A9CKP4|A9CKP5|A9CKP4_AGRT5|A9CKP4_AGRT6 Mitogen-activated protein kinase 13 Henry"; + assertEquals(expectedName, Uniprot.constructSequenceFastaHeader(entry) + .toString()); + } +} -- 1.7.10.2