</field>
</class>
<class name="jalview.datamodel.xdb.embl.EmblEntry">
- <map-to xml="entry"/>
<field name="accession" type="string">
<bind-xml location="accession" node="attribute"/>
</field>
+ <!-- May 2015 changed from last-updated to match xml -->
<field name="lastUpdated" type="string">
- <bind-xml location="last-updated" node="attribute"/>
+ <bind-xml location="lastUpdated" node="attribute"/>
</field>
<field name="version" type="string">
<bind-xml location="version" node="attribute"/>
</field>
-
<field name="rCreated" type="string">
<bind-xml location="releaseCreated" node="attribute"/>
</field>
<field name="desc" type="string">
<bind-xml name="description" node="element"/>
</field>
- <field name="Keywords" type="string" collection="vector">
+ <field name="keywords" type="string" collection="vector">
<bind-xml name="keyword" node="element"/>
</field>
<field name="features" type="jalview.datamodel.xdb.embl.EmblFeature" collection="vector">
<bind-xml name="dbreference" />
</field>
<field name="sequence" type="jalview.datamodel.xdb.embl.EmblSequence">
- <bind-xml name="sequence"/> <!-- location="sequence" node="element"/ -->
+ <bind-xml name="sequence"/>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.EmblSequence">
- <map-to xml="sequence"/>
<field name="type" type="string">
<bind-xml name="type" node="attribute" location="type"/>
</field>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.EmblFeature" verify-constructable="false">
- <map-to xml="feature"/>
<field name="name" type="string">
<bind-xml name="name" node="attribute"/>
</field>
</field>
</class>
<class name="jalview.datamodel.DBRefEntry" verify-constructable="false">
- <field name="accessionId" type="java.lang.String"><!-- set-method="setAccessionId" get-method="getAccessionId"> -->
+ <field name="accessionId" type="java.lang.String">
<bind-xml name="primary" node="attribute"/>
</field>
- <field name="source" type="java.lang.String"> <!--set-method="setSource" get-method="getSource"> -->
+ <field name="source" type="java.lang.String">
<bind-xml name="db" node="attribute"/>
</field>
- <field name="version" type="string"><!-- set-method="setVersion" get-method="getVersion"> -->
+ <field name="version" type="string">
<bind-xml name="secondary" node="attribute"/>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.Qualifier" verify-constructable="false">
- <map-to xml="Qualifier"/>
<field name="name">
<bind-xml name="name" node="attribute"/>
</field>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.EmblFeatureLocations">
- <map-to xml="location"/>
<field name="locationType" type="string">
<bind-xml name="type" node="attribute"/>
</field>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.EmblFeatureLocElement">
- <map-to xml="locationElement"/>
<field name="type" type="string">
<bind-xml name="type" node="attribute"/>
</field>
</field>
</class>
<class name="jalview.datamodel.xdb.embl.BasePosition">
- <map-to xml="basePosition"/>
<field name="type">
<bind-xml name="type" node="attribute"/>
</field>
for (SequenceI s : core.getSequences())
{
SequenceI newSeq = s.deriveSequence();
- if (newSeq.getStart() > maxoffset
+ final int newSeqStart = newSeq.getStart() - 1;
+ if (newSeqStart > maxoffset
&& newSeq.getDatasetSequence().getStart() < s.getStart())
{
- maxoffset = newSeq.getStart();
+ maxoffset = newSeqStart;
}
sq.add(newSeq);
}
if (flankSize > -1)
{
- maxoffset = flankSize;
+ maxoffset = Math.min(maxoffset, flankSize);
}
- // now add offset to create a new expanded alignment
+
+ /*
+ * now add offset left and right to create an expanded alignment
+ */
for (SequenceI s : sq)
{
SequenceI ds = s;
}
int s_end = s.findPosition(s.getStart() + s.getLength());
// find available flanking residues for sequence
- int ustream_ds = s.getStart() - ds.getStart(), dstream_ds = ds
- .getEnd() - s_end;
+ int ustream_ds = s.getStart() - ds.getStart();
+ int dstream_ds = ds.getEnd() - s_end;
// build new flanked sequence
offset = maxoffset - flankSize;
ustream_ds = flankSize;
}
- if (flankSize < dstream_ds)
+ if (flankSize <= dstream_ds)
{
- dstream_ds = flankSize;
+ dstream_ds = flankSize - 1;
}
}
+ // TODO use Character.toLowerCase to avoid creating String objects?
char[] upstream = new String(ds.getSequence(s.getStart() - 1
- ustream_ds, s.getStart() - 1)).toLowerCase().toCharArray();
- char[] downstream = new String(ds.getSequence(s_end - 1, s_end + 1
+ char[] downstream = new String(ds.getSequence(s_end - 1, s_end
+ dstream_ds)).toLowerCase().toCharArray();
char[] coreseq = s.getSequence();
char[] nseq = new char[offset + upstream.length + downstream.length
+ coreseq.length];
char c = core.getGapCharacter();
- // TODO could lowercase the flanking regions
+
int p = 0;
for (; p < offset; p++)
{
nseq[p] = c;
}
- // s.setSequence(new String(upstream).toLowerCase()+new String(coreseq) +
- // new String(downstream).toLowerCase());
+
System.arraycopy(upstream, 0, nseq, p, upstream.length);
System.arraycopy(coreseq, 0, nseq, p + upstream.length,
coreseq.length);
{
for (AlignmentAnnotation aa : s.getAnnotation())
{
+ aa.adjustForAlignment(); // JAL-1712 fix
newAl.addAnnotation(aa);
}
}
*/
package jalview.datamodel;
-import jalview.analysis.Rna;
-import jalview.analysis.SecStrConsensus.SimpleBP;
-import jalview.analysis.WUSSParseException;
-
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
+import jalview.analysis.Rna;
+import jalview.analysis.SecStrConsensus.SimpleBP;
+import jalview.analysis.WUSSParseException;
+
/**
* DOCUMENT ME!
*
{
if (properties == null)
{
- return Collections.EMPTY_LIST;
+ return Collections.emptyList();
}
return properties.keySet();
}
+
+ /**
+ * Returns the Annotation for the given sequence position (base 1) if any,
+ * else null
+ *
+ * @param position
+ * @return
+ */
+ public Annotation getAnnotationForPosition(int position)
+ {
+ return sequenceMapping == null ? null : sequenceMapping.get(position);
+
+ }
}
return file;
}
+ public void setType(String t)
+ {
+ this.type = t;
+ }
public void setType(PDBEntry.Type type)
{
this.type = type.toString();
*/
package jalview.datamodel;
-import jalview.analysis.AlignSeq;
-import jalview.util.StringUtils;
-
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import fr.orsay.lri.varna.models.rna.RNA;
+import jalview.analysis.AlignSeq;
+import jalview.util.StringUtils;
+
/**
*
* Implements the SequenceI interface for a char[] based sequence object.
.println("POSSIBLE IMPLEMENTATION ERROR: null sequence name passed to constructor.");
name = "";
}
- // Does sequence have the /start-end signiature?
+ // Does sequence have the /start-end signature?
if (limitrx.search(name))
{
name = limitrx.left();
* @param id
* DOCUMENT ME!
*/
- public void setPDBId(Vector id)
+ @Override
+ public void setPDBId(Vector<PDBEntry> id)
{
pdbIds = id;
}
*
* @return DOCUMENT ME!
*/
- public Vector getPDBId()
+ @Override
+ public Vector<PDBEntry> getPDBId()
{
return pdbIds;
}
{
if (this.annotation == null)
{
- this.annotation = new Vector();
+ this.annotation = new Vector<AlignmentAnnotation>();
}
if (!this.annotation.contains(annotation))
{
public char[] getSequence(int start, int end);
/**
- * create a new sequence object from start to end of this sequence
+ * create a new sequence object with a subsequence of this one but sharing the
+ * same dataset sequence
*
* @param start
- * int index for start position
+ * int index for start position (base 0, inclusive)
* @param end
- * int index for end position
+ * int index for end position (base 0, exclusive)
*
* @return SequenceI
* @note implementations may use getSequence to get the sequence data
*/
package jalview.datamodel;
-import java.util.*;
+import java.util.Vector;
+/**
+ * Data model for an entry returned from a Uniprot query
+ *
+ * @see uniprot_mapping.xml
+ */
public class UniprotEntry
{
UniprotSequence sequence;
- Vector name;
+ Vector<String> name;
- Vector accession;
+ Vector<String> accession;
- Vector feature;
+ Vector<SequenceFeature> feature;
- Vector dbrefs;
+ Vector<PDBEntry> dbrefs;
UniprotProteinName protName;
- public void setAccession(Vector items)
+ public void setAccession(Vector<String> items)
{
accession = items;
}
- public void setFeature(Vector items)
+ public void setFeature(Vector<SequenceFeature> items)
{
feature = items;
}
- public Vector getFeature()
+ public Vector<SequenceFeature> getFeature()
{
return feature;
}
- public Vector getAccession()
+ public Vector<String> getAccession()
{
return accession;
}
return protName;
}
- public void setName(Vector na)
+ public void setName(Vector<String> na)
{
name = na;
}
- public Vector getName()
+ public Vector<String> getName()
{
return name;
}
sequence = seq;
}
- public Vector getDbReference()
+ public Vector<PDBEntry> getDbReference()
{
return dbrefs;
}
- public void setDbReference(Vector dbref)
+ public void setDbReference(Vector<PDBEntry> dbref)
{
this.dbrefs = dbref;
}
*/
package jalview.datamodel;
-import java.util.*;
+import java.util.Vector;
+/**
+ * Data model of a retrieved Uniprot entry, as unmarshalled by Castor using a
+ * binding file (uniprot_mapping.xml)
+ */
public class UniprotFile
{
- Vector _items;
+ Vector<UniprotEntry> _items;
- public void setUniprotEntries(Vector items)
+ public void setUniprotEntries(Vector<UniprotEntry> items)
{
_items = items;
}
- public Vector getUniprotEntries()
+ public Vector<UniprotEntry> getUniprotEntries()
{
return _items;
}
*/
package jalview.datamodel;
+import java.util.Vector;
+
+/**
+ * Data model for protein name returned from a Uniprot query
+ *
+ * Protein names are read from the Uniprot XML element
+ * uniprot/entry/protein/recommendedName/fullName
+ *
+ * @see uniprot_mapping.xml
+ */
public class UniprotProteinName
{
- /**
- * internal content storage
- */
- private java.util.Vector names;
+ private Vector<String> names;
- public void setName(java.util.Vector names)
+ public void setName(Vector<String> names)
{
this.names = names;
}
- public java.util.Vector getName()
+ public Vector<String> getName()
{
return names;
}
*/
package jalview.datamodel;
+/**
+ * Data model for the sequence returned by a Uniprot query
+ *
+ * @see uniprot_mapping.xml
+ */
public class UniprotSequence
{
+ private String _content = "";
+
/**
- * internal content storage
+ * Sets the content string, omitting any space characters
+ *
+ * @param seq
*/
- private java.lang.String _content = "";
-
public void setContent(String seq)
{
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < seq.length(); i++)
+ if (seq != null)
{
- if (seq.charAt(i) != ' ')
+ StringBuilder sb = new StringBuilder(seq.length());
+ for (int i = 0; i < seq.length(); i++)
{
- sb.append(seq.charAt(i));
+ if (seq.charAt(i) != ' ')
+ {
+ sb.append(seq.charAt(i));
+ }
}
+ _content = sb.toString();
}
- _content = sb.toString();
}
public String getContent()
*/
package jalview.datamodel.xdb.embl;
+/**
+ * Data model for a feature/location/locationElement/basePosition read from an
+ * EMBL query reply
+ *
+ * @see embl_mapping.xml
+ */
public class BasePosition
{
String type;
*/
package jalview.datamodel.xdb.embl;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Vector;
+
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.FeatureProperties;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
-import java.util.Enumeration;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.Vector;
-
+/**
+ * Data model for one entry returned from an EMBL query, as marshalled by a
+ * Castor binding file
+ *
+ * For example: http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
+ *
+ * @see embl_mapping.xml
+ */
public class EmblEntry
{
String accession;
String lastUpdated;
- Vector keywords;
-
- Vector refs;
+ Vector<String> keywords;
- Vector dbRefs;
+ Vector<DBRefEntry> dbRefs;
- Vector features;
+ Vector<EmblFeature> features;
EmblSequence sequence;
/**
* @return the dbRefs
*/
- public Vector getDbRefs()
+ public Vector<DBRefEntry> getDbRefs()
{
return dbRefs;
}
* @param dbRefs
* the dbRefs to set
*/
- public void setDbRefs(Vector dbRefs)
+ public void setDbRefs(Vector<DBRefEntry> dbRefs)
{
this.dbRefs = dbRefs;
}
/**
* @return the features
*/
- public Vector getFeatures()
+ public Vector<EmblFeature> getFeatures()
{
return features;
}
* @param features
* the features to set
*/
- public void setFeatures(Vector features)
+ public void setFeatures(Vector<EmblFeature> features)
{
this.features = features;
}
/**
* @return the keywords
*/
- public Vector getKeywords()
+ public Vector<String> getKeywords()
{
return keywords;
}
* @param keywords
* the keywords to set
*/
- public void setKeywords(Vector keywords)
+ public void setKeywords(Vector<String> keywords)
{
this.keywords = keywords;
}
}
/**
- * @return the refs
- */
- public Vector getRefs()
- {
- return refs;
- }
-
- /**
- * @param refs
- * the refs to set
- */
- public void setRefs(Vector refs)
- {
- this.refs = refs;
- }
-
- /**
* @return the releaseCreated
*/
public String getRCreated()
* @param releaseCreated
* the releaseCreated to set
*/
- public void setRcreated(String releaseCreated)
+ public void setRCreated(String releaseCreated)
{
this.rCreated = releaseCreated;
}
*/
package jalview.datamodel.xdb.embl;
+/**
+ * Data model mapped from any <error> elements returned from an EMBL query
+ *
+ * @see embl_mapping.xml
+ */
public class EmblError
{
String accession;
import java.util.Vector;
+import jalview.datamodel.DBRefEntry;
+
+/**
+ * Data model for a <feature> element returned from an EMBL query reply
+ *
+ * @see embl_mapping.xml
+ */
public class EmblFeature
{
String name;
- Vector dbRefs;
+ Vector<DBRefEntry> dbRefs;
- Vector qualifiers;
+ Vector<Qualifier> qualifiers;
- Vector locations;
+ Vector<EmblFeatureLocations> locations;
/**
* @return the dbRefs
*/
- public Vector getDbRefs()
+ public Vector<DBRefEntry> getDbRefs()
{
return dbRefs;
}
* @param dbRefs
* the dbRefs to set
*/
- public void setDbRefs(Vector dbRefs)
+ public void setDbRefs(Vector<DBRefEntry> dbRefs)
{
this.dbRefs = dbRefs;
}
/**
* @return the locations
*/
- public Vector getLocations()
+ public Vector<EmblFeatureLocations> getLocations()
{
return locations;
}
* @param locations
* the locations to set
*/
- public void setLocations(Vector locations)
+ public void setLocations(Vector<EmblFeatureLocations> locations)
{
this.locations = locations;
}
/**
* @return the qualifiers
*/
- public Vector getQualifiers()
+ public Vector<Qualifier> getQualifiers()
{
return qualifiers;
}
* @param qualifiers
* the qualifiers to set
*/
- public void setQualifiers(Vector qualifiers)
+ public void setQualifiers(Vector<Qualifier> qualifiers)
{
this.qualifiers = qualifiers;
}
*/
package jalview.datamodel.xdb.embl;
+/**
+ * Data model for a feature/location/locationElement read from an EMBL query
+ * reply
+ *
+ * @see embl_mapping.xml
+ */
public class EmblFeatureLocElement
{
String type;
*/
package jalview.datamodel.xdb.embl;
-import java.util.Enumeration;
import java.util.Vector;
+/**
+ * Data model for a <loctaion> child element of a <feature> read
+ * from an EMBL query reply
+ *
+ * @see embl_mapping.xml
+ */
public class EmblFeatureLocations
{
- Vector locElements;
+ Vector<EmblFeatureLocElement> locElements;
String locationType;
/**
* @return the locElements
*/
- public Vector getLocElements()
+ public Vector<EmblFeatureLocElement> getLocElements()
{
return locElements;
}
* @param locElements
* the locElements to set
*/
- public void setLocElements(Vector locElements)
+ public void setLocElements(Vector<EmblFeatureLocElement> locElements)
{
this.locElements = locElements;
}
{
int sepos = 0;
int[] se = new int[locElements.size() * 2];
- if (locationType.equalsIgnoreCase("single"))
+ if (locationType.equalsIgnoreCase("single")) // TODO: or "simple" ?
{
- for (Enumeration le = locElements.elements(); le.hasMoreElements();)
+ for (EmblFeatureLocElement loce : locElements)
{
- EmblFeatureLocElement loce = (EmblFeatureLocElement) le
- .nextElement();
if (accession == null || loce.accession != null
&& accession.equals(loce.accession))
{
}
else if (locationType.equalsIgnoreCase("join"))
{
- for (Enumeration le = locElements.elements(); le.hasMoreElements();)
+ for (EmblFeatureLocElement loce : locElements)
{
- EmblFeatureLocElement loce = (EmblFeatureLocElement) le
- .nextElement();
if (accession == null || loce.accession != null
&& accession.equals(loce.accession))
{
else if (locationType != null)
{
if (jalview.bin.Cache.log != null)
+ {
jalview.bin.Cache.log
.error("EmbleFeatureLocations.getElementRanges cannot deal with locationType=='"
+ locationType + "'");
+ }
else
+ {
System.err
.println("EmbleFeatureLocations.getElementRanges cannot deal with locationType=='"
+ locationType + "'");
+ }
}
// trim range if necessary.
if (se != null && sepos != se.length)
import java.io.File;
import java.io.FileReader;
+import java.io.PrintWriter;
import java.io.Reader;
import java.util.Vector;
import org.exolab.castor.mapping.Mapping;
import org.exolab.castor.xml.Unmarshaller;
+/**
+ * Data model for entries returned from an EMBL query, as marshalled by a Castor
+ * binding file
+ *
+ * For example: http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
+ *
+ * @see embl_mapping.xml
+ */
public class EmblFile
{
- Vector entries;
+ Vector<EmblEntry> entries;
- Vector errors;
+ Vector<EmblError> errors;
/**
* @return the entries
*/
- public Vector getEntries()
+ public Vector<EmblEntry> getEntries()
{
return entries;
}
* @param entries
* the entries to set
*/
- public void setEntries(Vector entries)
+ public void setEntries(Vector<EmblEntry> entries)
{
this.entries = entries;
}
/**
* @return the errors
*/
- public Vector getErrors()
+ public Vector<EmblError> getErrors()
{
return errors;
}
* @param errors
* the errors to set
*/
- public void setErrors(Vector errors)
+ public void setErrors(Vector<EmblError> errors)
{
this.errors = errors;
}
public static EmblFile getEmblFile(File file)
{
if (file == null)
+ {
return null;
+ }
try
{
return EmblFile.getEmblFile(new FileReader(file));
{
// 1. Load the mapping information from the file
Mapping map = new Mapping(record.getClass().getClassLoader());
+
java.net.URL url = record.getClass().getResource("/embl_mapping.xml");
map.loadMapping(url);
try
{
// uncomment to DEBUG EMBLFile reading
- if (((String) jalview.bin.Cache.getDefault(
- jalview.bin.Cache.CASTORLOGLEVEL, "debug"))
+ if (jalview.bin.Cache.getDefault(
+ jalview.bin.Cache.CASTORLOGLEVEL, "debug")
.equalsIgnoreCase("DEBUG"))
+ {
unmar.setDebug(jalview.bin.Cache.log.isDebugEnabled());
+ }
} catch (Exception e)
{
}
- ;
unmar.setIgnoreExtraElements(true);
+ unmar.setIgnoreExtraAttributes(true);
unmar.setMapping(map);
-
+ unmar.setLogWriter(new PrintWriter(System.out));
record = (EmblFile) unmar.unmarshal(file);
} catch (Exception e)
{
return record;
}
-
- public static void main(String args[])
- {
- File mf = null;
- if (args.length == 1)
- {
- mf = new File(args[0]);
- }
- if (!mf.exists())
- {
- mf = new File(
- "C:\\Documents and Settings\\JimP\\workspace-3.2\\Jalview Release\\schemas\\embleRecordV1.1.xml");
- }
- EmblFile myfile = EmblFile.getEmblFile(mf);
- if (myfile != null && myfile.entries != null
- && myfile.entries.size() > 0)
- System.out.println(myfile.entries.size() + " Records read. (" + mf
- + ")");
- }
}
*/
package jalview.datamodel.xdb.embl;
+/**
+ * Data model for the sequence extracted from an EMBL query reply
+ *
+ * @see embl_mapping.xml
+ */
public class EmblSequence
{
String version;
*/
package jalview.datamodel.xdb.embl;
+/**
+ * Data model for a <qualifier> child element of a <feature> read
+ * from an EMBL query reply
+ *
+ * @see embl_mapping.xml
+ */
public class Qualifier
{
String name;
public void addEvidence(String qevidence)
{
+ // TODO - not used? can remove?
if (evidence == null)
{
evidence = new String[1];
public void addValues(String value)
{
+ // TODO - not used? can remove?
if (values == null)
{
values = new String[1];
*/
package jalview.ws.dbsources;
+import java.io.File;
+
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceI;
import jalview.datamodel.xdb.embl.EmblEntry;
+import jalview.datamodel.xdb.embl.EmblFile;
import jalview.util.MessageManager;
import jalview.ws.ebi.EBIFetchClient;
-import java.io.File;
-import java.util.Iterator;
-
public abstract class EmblXmlSource extends EbiFileRetrievedProxy
{
/**
* Last properly parsed embl file.
*/
- public jalview.datamodel.xdb.embl.EmblFile efile = null;
+ public EmblFile efile = null;
public EmblXmlSource()
{
file = reply.getAbsolutePath();
if (reply.length() > 25)
{
- efile = jalview.datamodel.xdb.embl.EmblFile.getEmblFile(reply);
+ efile = EmblFile.getEmblFile(reply);
}
else
{
}
if (efile != null)
{
- for (Iterator i = efile.getEntries().iterator(); i.hasNext();)
+ for (EmblEntry entry : efile.getEntries())
{
- EmblEntry entry = (EmblEntry) i.next();
- SequenceI[] seqparts = entry.getSequences(false, true, emprefx); // TODO:
- // use
- // !fetchNa,!fetchPeptide
- // here
- // instead
- // -
- // see
- // todo
- // in
- // emblEntry
+ SequenceI[] seqparts = entry.getSequences(false, true, emprefx);
+ // TODO: use !fetchNa,!fetchPeptide here instead - see todo in EmblEntry
if (seqparts != null)
{
SequenceI[] newseqs = null;
}
for (int j = 0; j < seqparts.length; si++, j++)
{
- newseqs[si] = seqparts[j].deriveSequence(); // place DBReferences on
- // dataset and refer
+ newseqs[si] = seqparts[j].deriveSequence();
+ // place DBReferences on dataset and refer
}
seqs = newseqs;
import java.io.File;
import java.io.FileReader;
-import java.util.Enumeration;
+import java.io.Reader;
import java.util.Vector;
import org.exolab.castor.xml.Unmarshaller;
*/
public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
{
+
+ private static final String BAR_DELIMITER = "|";
+
+ private static final String NEWLINE = "\n";
+
+ private static org.exolab.castor.mapping.Mapping map;
+
+ /**
+ * Constructor
+ */
public Uniprot()
{
super();
return "0"; // we really don't know what version we're on.
}
- private EBIFetchClient ebi = null;
-
- private static org.exolab.castor.mapping.Mapping map;
-
- public Vector getUniprotEntries(File file)
+ /**
+ * Reads a file containing the reply to the EBI Fetch Uniprot data query,
+ * unmarshals it to a UniprotFile object, and returns the list of UniprotEntry
+ * data models (mapped from <entry> elements)
+ *
+ * @param fileReader
+ * @return
+ */
+ public Vector<UniprotEntry> getUniprotEntries(Reader fileReader)
{
UniprotFile uni = new UniprotFile();
try
Unmarshaller unmar = new Unmarshaller(uni);
unmar.setIgnoreExtraElements(true);
unmar.setMapping(map);
- if (file != null)
+ if (fileReader != null)
{
- uni = (UniprotFile) unmar.unmarshal(new FileReader(file));
+ uni = (UniprotFile) unmar.unmarshal(fileReader);
}
} catch (Exception e)
{
queries = queries.toUpperCase().replaceAll(
"(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
Alignment al = null;
- ebi = new EBIFetchClient();
- StringBuffer result = new StringBuffer();
+ EBIFetchClient ebi = new EBIFetchClient();
// uniprotxml parameter required since december 2007
// uniprotkb dbname changed introduced december 2008
File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml",
null);
- Vector entries = getUniprotEntries(file);
+ Vector<UniprotEntry> entries = getUniprotEntries(new FileReader(file));
if (entries != null)
{
+ /*
+ * If Castor binding included sequence@length, we could guesstimate the
+ * size of buffer to hold the alignment
+ */
+ StringBuffer result = new StringBuffer(128);
// First, make the new sequences
- Enumeration en = entries.elements();
- while (en.hasMoreElements())
+ for (UniprotEntry entry : entries)
{
- UniprotEntry entry = (UniprotEntry) en.nextElement();
-
- StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot");
- Enumeration en2 = entry.getAccession().elements();
- while (en2.hasMoreElements())
- {
- name.append("|");
- name.append(en2.nextElement());
- }
- en2 = entry.getName().elements();
- while (en2.hasMoreElements())
- {
- name.append("|");
- name.append(en2.nextElement());
- }
-
- if (entry.getProtein() != null
- && entry.getProtein().getName() != null)
- {
- for (int nm = 0, nmSize = entry.getProtein().getName().size(); nm < nmSize; nm++)
- {
- name.append(" " + entry.getProtein().getName().elementAt(nm));
- }
- }
-
- result.append(name + "\n"
- + entry.getUniprotSequence().getContent() + "\n");
+ StringBuilder name = constructSequenceFastaHeader(entry);
+ result.append(name).append(NEWLINE)
+ .append(entry.getUniprotSequence().getContent())
+ .append(NEWLINE);
}
// Then read in the features and apply them to the dataset
}
/**
+ * Construct a Fasta-format sequence header by concatenating the source,
+ * accession id(s) and name(s), delimited by '|', plus any protein names, now
+ * with space rather than bar delimiter
+ *
+ * @param entry
+ * @return
+ */
+ public static StringBuilder constructSequenceFastaHeader(
+ UniprotEntry entry)
+ {
+ StringBuilder name = new StringBuilder(32);
+ name.append(">UniProt/Swiss-Prot");
+ for (String accessionId : entry.getAccession())
+ {
+ name.append(BAR_DELIMITER);
+ name.append(accessionId);
+ }
+ for (String n : entry.getName())
+ {
+ name.append(BAR_DELIMITER);
+ name.append(n);
+ }
+
+ if (entry.getProtein() != null
+ && entry.getProtein().getName() != null)
+ {
+ for (String nm : entry.getProtein().getName())
+ {
+ name.append(" ").append(nm);
+ }
+ }
+ return name;
+ }
+
+ /**
* add an ordered set of UniprotEntry objects to an ordered set of seuqences.
*
* @param al
* - a sequence of n sequences
* @param entries
- * a seuqence of n uniprot entries to be analysed.
+ * a list of n uniprot entries to be analysed.
*/
- public void addUniprotXrefs(Alignment al, Vector entries)
+ public void addUniprotXrefs(Alignment al, Vector<UniprotEntry> entries)
{
+ final String dbVersion = getDbVersion();
+
for (int i = 0; i < entries.size(); i++)
{
- UniprotEntry entry = (UniprotEntry) entries.elementAt(i);
- Enumeration e = entry.getDbReference().elements();
- Vector<PDBEntry> onlyPdbEntries = new Vector();
- Vector dbxrefs = new Vector();
- while (e.hasMoreElements())
+ UniprotEntry entry = entries.elementAt(i);
+ Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
+ Vector<DBRefEntry> dbxrefs = new Vector<DBRefEntry>();
+
+ for (PDBEntry pdb : entry.getDbReference())
{
- PDBEntry pdb = (PDBEntry) e.nextElement();
DBRefEntry dbr = new DBRefEntry();
dbr.setSource(pdb.getType());
dbr.setAccessionId(pdb.getId());
- dbr.setVersion(DBRefSource.UNIPROT + ":" + getDbVersion());
+ dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion);
dbxrefs.addElement(dbr);
- if (!pdb.getType().equals("PDB"))
+ if ("PDB".equals(pdb.getType()))
{
- continue;
+ onlyPdbEntries.addElement(pdb);
}
-
- onlyPdbEntries.addElement(pdb);
}
+
SequenceI sq = al.getSequenceAt(i);
while (sq.getDatasetSequence() != null)
{
sq = sq.getDatasetSequence();
}
- Enumeration en2 = entry.getAccession().elements();
- while (en2.hasMoreElements())
+ for (String accessionId : entry.getAccession())
{
- // we always add as uniprot if we retrieved from uniprot or uniprot name
- sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, getDbVersion(), en2
- .nextElement().toString()));
+ /*
+ * add as uniprot whether retrieved from uniprot or uniprot_name
+ */
+ sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
+ accessionId));
}
- en2 = dbxrefs.elements();
- while (en2.hasMoreElements())
- {
- // we always add as uniprot if we retrieved from uniprot or uniprot name
- sq.addDBRef((DBRefEntry) en2.nextElement());
+ for (DBRefEntry dbRef : dbxrefs)
+ {
+ sq.addDBRef(dbRef);
}
sq.setPDBId(onlyPdbEntries);
if (entry.getFeature() != null)
{
- e = entry.getFeature().elements();
- while (e.hasMoreElements())
+ for (SequenceFeature sf : entry.getFeature())
{
- SequenceFeature sf = (SequenceFeature) e.nextElement();
sf.setFeatureGroup("Uniprot");
sq.addSequenceFeature(sf);
}
*/
package jalview.ws.ebi;
-import jalview.util.MessageManager;
-
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
+import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
+import java.util.List;
import java.util.StringTokenizer;
+import jalview.util.MessageManager;
+
/**
* DOCUMENT ME!
*
throw new Error(MessageManager.getString("error.not_yet_implemented"));
}
+ /**
+ * Send an HTTP fetch request to EBI and save the reply in a temporary file.
+ *
+ * @param ids
+ * the query formatted as db:query1;query2;query3
+ * @param f
+ * the format wanted
+ * @param s
+ * - unused parameter
+ * @return the file holding the response
+ * @throws OutOfMemoryError
+ */
public File fetchDataAsFile(String ids, String f, String s)
throws OutOfMemoryError
{
* @param f
* raw/xml
* @param s
- * ?
+ * not used - remove?
*
* @return Raw string array result of query set
*/
+ db.toLowerCase() + "/" + ids.toLowerCase()
+ (f != null ? "/" + f : ""));
- BufferedInputStream is = new BufferedInputStream(rcall.openStream());
+ InputStream is = new BufferedInputStream(rcall.openStream());
if (outFile != null)
{
FileOutputStream fio = new FileOutputStream(outFile);
{
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String rtn;
- ArrayList<String> arl = new ArrayList<String>();
+ List<String> arl = new ArrayList<String>();
while ((rtn = br.readLine()) != null)
{
arl.add(rtn);
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
"GGGTCAGGCAGT\n";
// @formatter:on
- public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
+ // public static Sequence ts=new
+ // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
+ public static Sequence ts = new Sequence("short",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
@Test
- public void testExpandFlanks()
+ public void testExpandContext()
{
AlignmentI al = new Alignment(new Sequence[] {});
- for (int i=4;i<14;i+=3)
+ for (int i = 4; i < 14; i += 2)
{
SequenceI s1=ts.deriveSequence().getSubSequence(i, i+7);
al.addSequence(s1);
System.out.println(new AppletFormatAdapter().formatSequences("Clustal", al, true));
for (int flnk=-1;flnk<25; flnk++)
{
- AlignmentI exp;
- System.out.println("\nFlank size: "+flnk);
- System.out.println(new AppletFormatAdapter().formatSequences("Clustal", exp=AlignmentUtils.expandContext(al, flnk), true));
- if (flnk==-1) {
- for (SequenceI sq:exp.getSequences())
+ AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
+ System.out.println("\nFlank size: " + flnk);
+ System.out.println(new AppletFormatAdapter().formatSequences(
+ "Clustal", exp, true));
+ if (flnk == -1)
{
+ /*
+ * Full expansion to complete sequences
+ */
+ for (SequenceI sq : exp.getSequences())
+ {
String ung = sq.getSequenceAsString().replaceAll("-+", "");
- assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString()));
+ final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
+ + ung
+ + "\n"
+ + sq.getDatasetSequence().getSequenceAsString();
+ assertTrue(errorMsg, ung.equalsIgnoreCase(sq.getDatasetSequence()
+ .getSequenceAsString()));
+ }
}
+ else if (flnk == 24)
+ {
+ /*
+ * Last sequence is fully expanded, others have leading gaps to match
+ */
+ assertTrue(exp.getSequenceAt(4).getSequenceAsString()
+ .startsWith("abc"));
+ assertTrue(exp.getSequenceAt(3).getSequenceAsString()
+ .startsWith("--abc"));
+ assertTrue(exp.getSequenceAt(2).getSequenceAsString()
+ .startsWith("----abc"));
+ assertTrue(exp.getSequenceAt(1).getSequenceAsString()
+ .startsWith("------abc"));
+ assertTrue(exp.getSequenceAt(0).getSequenceAsString()
+ .startsWith("--------abc"));
}
}
- }
+ }
+
+ /**
+ * Test that annotations are correctly adjusted by expandContext
+ */
+ @Test
+ public void testExpandContext_annotation()
+ {
+ AlignmentI al = new Alignment(new Sequence[]
+ {});
+ SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
+ // subsequence DEF:
+ SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
+ al.addSequence(seq1);
+
+ /*
+ * Annotate DEF with 4/5/6 respectively
+ */
+ Annotation[] anns = new Annotation[]
+ { new Annotation(4), new Annotation(5), new Annotation(6) };
+ AlignmentAnnotation ann = new AlignmentAnnotation("SS",
+ "secondary structure", anns);
+ seq1.addAlignmentAnnotation(ann);
+
+ /*
+ * The annotations array should match aligned positions
+ */
+ assertEquals(3, ann.annotations.length);
+ assertEquals(4, ann.annotations[0].value, 0.001);
+ assertEquals(5, ann.annotations[1].value, 0.001);
+ assertEquals(6, ann.annotations[2].value, 0.001);
+
+ /*
+ * Check annotation to sequence position mappings before expanding the
+ * sequence; these are set up in Sequence.addAlignmentAnnotation ->
+ * Annotation.setSequenceRef -> createSequenceMappings
+ */
+ assertNull(ann.getAnnotationForPosition(1));
+ assertNull(ann.getAnnotationForPosition(2));
+ assertNull(ann.getAnnotationForPosition(3));
+ assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
+ assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
+ assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
+ assertNull(ann.getAnnotationForPosition(7));
+ assertNull(ann.getAnnotationForPosition(8));
+ assertNull(ann.getAnnotationForPosition(9));
+
+ /*
+ * Expand the subsequence to the full sequence abcDEFghi
+ */
+ AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
+ // FIXME expandContext adds an unnecessary gap; need tests to cover all
+ // cases for which 'maxOffset' is computed
+ assertEquals("-abcDEFghi", expanded.getSequenceAt(0)
+ .getSequenceAsString());
+
+ /*
+ * Confirm the alignment and sequence have the same SS annotation,
+ * referencing the expanded sequence
+ */
+ ann = expanded.getSequenceAt(0).getAnnotation()[0];
+ assertSame(ann, expanded.getAlignmentAnnotation()[0]);
+ assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
+
+ /*
+ * The annotations array should have null values except for annotated
+ * positions
+ */
+ assertNull(ann.annotations[0]);
+ assertNull(ann.annotations[1]);
+ assertNull(ann.annotations[2]);
+ assertNull(ann.annotations[3]);
+ assertEquals(4, ann.annotations[4].value, 0.001);
+ assertEquals(5, ann.annotations[5].value, 0.001);
+ assertEquals(6, ann.annotations[6].value, 0.001);
+ assertNull(ann.annotations[7]);
+ assertNull(ann.annotations[8]);
+ assertNull(ann.annotations[9]);
+
+ /*
+ * sequence position mappings should be unchanged
+ */
+ assertNull(ann.getAnnotationForPosition(1));
+ assertNull(ann.getAnnotationForPosition(2));
+ assertNull(ann.getAnnotationForPosition(3));
+ assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
+ assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
+ assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
+ assertNull(ann.getAnnotationForPosition(7));
+ assertNull(ann.getAnnotationForPosition(8));
+ assertNull(ann.getAnnotationForPosition(9));
+ }
/**
* Test method that returns a map of lists of sequences by sequence name.
package jalview.datamodel;
import static org.junit.Assert.assertEquals;
-import jalview.analysis.AlignSeq;
-import jalview.io.AppletFormatAdapter;
+import static org.junit.Assert.assertNull;
import org.junit.Test;
+import jalview.analysis.AlignSeq;
+import jalview.io.AppletFormatAdapter;
+
public class AlignmentAnnotationTests
{
@Test
: "Out of range");
assertEquals("Position " + p + " " + alm1 + " " + alm2, alm1, alm2);
}
- // new jalview.io.FormatAdapter().formatSequences("STOCKHOLM", n)
}
+ @Test
+ public void testAdjustForAlignment()
+ {
+ SequenceI seq = new Sequence("TestSeq", "ABCDEFG");
+ seq.createDatasetSequence();
+
+ /*
+ * Annotate positions 3/4/5 (CDE) with values 1/2/3
+ */
+ Annotation[] anns = new Annotation[]
+ { null, null, new Annotation(1), new Annotation(2), new Annotation(3) };
+ AlignmentAnnotation ann = new AlignmentAnnotation("SS",
+ "secondary structure", anns);
+ seq.addAlignmentAnnotation(ann);
+
+ /*
+ * Check annotation map before modifying aligned sequence
+ */
+ assertNull(ann.getAnnotationForPosition(1));
+ assertNull(ann.getAnnotationForPosition(2));
+ assertNull(ann.getAnnotationForPosition(6));
+ assertNull(ann.getAnnotationForPosition(7));
+ assertEquals(1, ann.getAnnotationForPosition(3).value, 0.001d);
+ assertEquals(2, ann.getAnnotationForPosition(4).value, 0.001d);
+ assertEquals(3, ann.getAnnotationForPosition(5).value, 0.001d);
+
+ /*
+ * Trim the displayed sequence to BCD and adjust annotations
+ */
+ seq.setSequence("BCD");
+ seq.setStart(2);
+ seq.setEnd(4);
+ ann.adjustForAlignment();
+
+ /*
+ * Should now have annotations for aligned positions 2, 3Q (CD) only
+ */
+ assertEquals(3, ann.annotations.length);
+ assertNull(ann.annotations[0]);
+ assertEquals(1, ann.annotations[1].value, 0.001);
+ assertEquals(2, ann.annotations[2].value, 0.001);
+ }
}
seq.getDatasetSequence().setDatasetSequence(seq); // loop!
assertNull(seq.getSequenceFeatures());
}
+
+ /**
+ * Test the method that returns an array, indexed by sequence position, whose
+ * entries are the residue positions at the sequence position (or to the right
+ * if a gap)
+ */
+ @Test
+ public void testFindPositionMap()
+ {
+ /*
+ * Note: Javadoc for findPosition says it returns the residue position to
+ * the left of a gapped position; in fact it returns the position to the
+ * right. Also it returns a non-existent residue position for a gap beyond
+ * the sequence.
+ */
+ Sequence seq = new Sequence("TestSeq", "AB.C-D E.");
+ int[] map = seq.findPositionMap();
+ assertEquals(Arrays.toString(new int[]
+ { 1, 2, 3, 3, 4, 4, 5, 5, 6 }), Arrays.toString(map));
+ }
+
+ /**
+ * Test for getSubsequence
+ */
+ @Test
+ public void testGetSubsequence()
+ {
+ SequenceI seq = new Sequence("TestSeq", "ABCDEFG");
+ seq.createDatasetSequence();
+
+ // positions are base 0, end position is exclusive
+ SequenceI subseq = seq.getSubSequence(2, 4);
+
+ assertEquals("CD", subseq.getSequenceAsString());
+ // start/end are base 1 positions
+ assertEquals(3, subseq.getStart());
+ assertEquals(4, subseq.getEnd());
+ // subsequence shares the full dataset sequence
+ assertSame(seq.getDatasetSequence(), subseq.getDatasetSequence());
+ }
+
+ /**
+ * Test for deriveSequence applied to a sequence with a dataset
+ */
+ @Test
+ public void testDeriveSequence_existingDataset()
+ {
+ SequenceI seq = new Sequence("Seq1", "CD");
+ seq.setDatasetSequence(new Sequence("Seq1", "ABCDEF"));
+ seq.setStart(3);
+ seq.setEnd(4);
+ SequenceI derived = seq.deriveSequence();
+ assertEquals("CD", derived.getSequenceAsString());
+ assertSame(seq.getDatasetSequence(), derived.getDatasetSequence());
+ }
+
+ /**
+ * Test for deriveSequence applied to an ungapped sequence with no dataset
+ */
+ @Test
+ public void testDeriveSequence_noDatasetUngapped()
+ {
+ SequenceI seq = new Sequence("Seq1", "ABCDEF");
+ assertEquals(1, seq.getStart());
+ assertEquals(6, seq.getEnd());
+ SequenceI derived = seq.deriveSequence();
+ assertEquals("ABCDEF", derived.getSequenceAsString());
+ assertEquals("ABCDEF", derived.getDatasetSequence()
+ .getSequenceAsString());
+ }
+
+ /**
+ * Test for deriveSequence applied to a gapped sequence with no dataset
+ */
+ @Test
+ public void testDeriveSequence_noDatasetGapped()
+ {
+ SequenceI seq = new Sequence("Seq1", "AB-C.D EF");
+ assertEquals(1, seq.getStart());
+ assertEquals(6, seq.getEnd());
+ assertNull(seq.getDatasetSequence());
+ SequenceI derived = seq.deriveSequence();
+ assertEquals("AB-C.D EF", derived.getSequenceAsString());
+ assertEquals("ABCDEF", derived.getDatasetSequence()
+ .getSequenceAsString());
+ }
}
--- /dev/null
+package jalview.datamodel.xdb.embl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.StringReader;
+import java.util.Vector;
+
+import org.junit.Test;
+
+import jalview.datamodel.DBRefEntry;
+
+public class EmblFileTest
+{
+ // adapted from http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/embl/x53828/emblxml
+ private static final String TESTDATA =
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
+ + "<EMBL_Services>"
+ + "<entry accession=\"X53828\" version=\"3\" lastUpdated=\"2005-04-18\" releaseCreated=\"25\" releaseLastUpdated=\"83\">"
+ + "<description>Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)</description>"
+ + "<keyword>L-lactate dehydrogenase</keyword><keyword>chutney</keyword>"
+ + "<dbreference db=\"EuropePMC\" primary=\"PMC1460223\" secondary=\"9649548\" />"
+ + "<feature name=\"CDS\"><dbreference db=\"GOA\" primary=\"P00340\" secondary=\"2.1\" /><dbreference db=\"InterPro\" primary=\"IPR001236\" />"
+ + "<qualifier name=\"note\"><value>L-lactate dehydrogenase A-chain</value><value>pickle</value></qualifier>"
+ + "<qualifier name=\"translation\"><value>MSLKDHLIHN</value><evidence>Keith</evidence></qualifier>"
+ + "<location type=\"single\" complement=\"true\">"
+ + "<locationElement type=\"range\" accession=\"X53828\" version=\"1\" complement=\"false\">"
+ + "<basePosition type=\"simple\">60</basePosition><basePosition type=\"join\">1058</basePosition>"
+ + "</locationElement></location></feature>"
+ + "<sequence type=\"mRNA\" version=\"2\">GTGACG</sequence></entry></EMBL_Services>";
+
+ @Test
+ public void testGetEmblFile()
+ {
+ Vector<EmblEntry> entries = EmblFile.getEmblFile(
+ new StringReader(TESTDATA)).getEntries();
+ assertEquals(1, entries.size());
+ EmblEntry entry = entries.get(0);
+
+ assertEquals("X53828", entry.getAccession());
+ assertEquals(
+ "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)",
+ entry.getDesc());
+ assertEquals("2005-04-18", entry.getLastUpdated());
+
+ /*
+ * FIXME these assertions fail - values are null - why?? Adding or removing
+ * attributes in the test XML modifies behaviour. eg. inserting an attribute
+ * _before_ lastUpdated results in a null value in this field.
+ */
+ // assertEquals("25", entry.getRCreated());
+ // assertEquals("83", entry.getRLastUpdated());
+
+ assertEquals(2, entry.getKeywords().size());
+ assertEquals("L-lactate dehydrogenase", entry.getKeywords().get(0));
+ assertEquals("chutney", entry.getKeywords().get(1));
+
+ /*
+ * dbrefs
+ */
+ assertEquals(1, entry.getDbRefs().size());
+ DBRefEntry dbref = entry.getDbRefs().get(0);
+ assertEquals("EuropePMC", dbref.getSource());
+ assertEquals("PMC1460223", dbref.getAccessionId());
+ assertEquals("9649548", dbref.getVersion());
+
+ /*
+ * sequence features
+ */
+ assertEquals(1, entry.getFeatures().size());
+ EmblFeature ef = entry.getFeatures().get(0);
+ assertEquals("CDS", ef.getName());
+ assertEquals(2, ef.getDbRefs().size());
+ dbref = ef.getDbRefs().get(0);
+ assertEquals("GOA", dbref.getSource());
+ assertEquals("P00340", dbref.getAccessionId());
+ assertEquals("2.1", dbref.getVersion());
+ dbref = ef.getDbRefs().get(1);
+ assertEquals("InterPro", dbref.getSource());
+ assertEquals("IPR001236", dbref.getAccessionId());
+ assertEquals("", dbref.getVersion());
+ assertEquals(2, ef.getQualifiers().size());
+
+ // feature qualifiers
+ Qualifier q = ef.getQualifiers().get(0);
+ assertEquals("note", q.getName());
+ assertEquals(2, q.getValues().length);
+ assertEquals("L-lactate dehydrogenase A-chain", q.getValues()[0]);
+ assertEquals("pickle", q.getValues()[1]);
+ assertNull(q.getEvidence());
+ q = ef.getQualifiers().get(1);
+ assertEquals("translation", q.getName());
+ assertEquals(1, q.getValues().length);
+ assertEquals("MSLKDHLIHN", q.getValues()[0]);
+ assertEquals(1, q.getEvidence().length);
+ assertEquals("Keith", q.getEvidence()[0]);
+
+ // feature locations
+ assertEquals(1, ef.getLocations().size());
+ EmblFeatureLocations fl = ef.getLocations().get(0);
+ assertEquals("single", fl.getLocationType());
+ assertTrue(fl.isLocationComplement());
+ assertEquals(1, fl.getLocElements().size());
+ EmblFeatureLocElement le = fl.getLocElements().get(0);
+ assertEquals("range", le.getType());
+ assertEquals("X53828", le.getAccession());
+ assertEquals("1", le.getVersion());
+ assertFalse(le.isComplement());
+ assertEquals(2, le.getBasePositions().length);
+ BasePosition bp = le.getBasePositions()[0];
+ assertEquals("simple", bp.getType());
+ assertEquals("60", bp.getPos());
+ bp = le.getBasePositions()[1];
+ assertEquals("join", bp.getType());
+ assertEquals("1058", bp.getPos());
+
+ /*
+ * Sequence
+ */
+ EmblSequence seq = entry.getSequence();
+ assertEquals("mRNA", seq.getType());
+ assertEquals("2", seq.getVersion());
+ assertEquals("GTGACG", seq.getSequence());
+ }
+}
--- /dev/null
+package jalview.ws.dbsources;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Vector;
+
+import org.junit.Test;
+
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.UniprotEntry;
+
+public class UniprotTest
+{
+ // adapted from http://www.uniprot.org/uniprot/A9CKP4.xml
+ private static final String UNIPROT_XML = "<?xml version='1.0' encoding='UTF-8'?>"
+ + "<uniprot>"
+ + "<entry dataset=\"TrEMBL\" created=\"2008-01-15\" modified=\"2015-03-04\" version=\"38\">"
+ + "<accession>A9CKP4</accession>"
+ + "<accession>A9CKP5</accession>"
+ + "<name>A9CKP4_AGRT5</name>"
+ + "<name>A9CKP4_AGRT6</name>"
+ + "<protein><recommendedName><fullName>Mitogen-activated protein kinase 13</fullName><fullName>Henry</fullName></recommendedName></protein>"
+ + "<dbReference type=\"PDB\" id=\"2FSQ\"><property type=\"method\" value=\"X-ray\"/><property type=\"resolution\" value=\"1.40\"/></dbReference>"
+ + "<dbReference type=\"PDBsum\" id=\"2FSR\"/>"
+ + "<feature type=\"signal peptide\" evidence=\"7\"><location><begin position=\"1\"/><end position=\"18\"/></location></feature>"
+ + "<feature type=\"propeptide\" description=\"Activation peptide\" id=\"PRO_0000027399\" evidence=\"9 16 17 18\"><location><begin position=\"19\"/><end position=\"20\"/></location></feature>"
+ + "<feature type=\"chain\" description=\"Granzyme B\" id=\"PRO_0000027400\"><location><begin position=\"21\"/><end position=\"247\"/></location></feature>"
+ + "<sequence length=\"10\" mass=\"27410\" checksum=\"8CB760AACF88FE6C\" modified=\"2008-01-15\" version=\"1\">MHAPL VSKDL</sequence></entry>"
+ + "</uniprot>";
+
+ /**
+ * Test the method that unmarshals XML to a Uniprot model
+ */
+ @Test
+ public void testGetUniprotEntries()
+ {
+ Uniprot u = new Uniprot();
+ Reader reader = new StringReader(UNIPROT_XML);
+ Vector<UniprotEntry> entries = u.getUniprotEntries(reader);
+ assertEquals(1, entries.size());
+ UniprotEntry entry = entries.get(0);
+ assertEquals(2, entry.getName().size());
+ assertEquals("A9CKP4_AGRT5", entry.getName().get(0));
+ assertEquals("A9CKP4_AGRT6", entry.getName().get(1));
+ assertEquals(2, entry.getAccession().size());
+ assertEquals("A9CKP4", entry.getAccession().get(0));
+ assertEquals("A9CKP5", entry.getAccession().get(1));
+
+ /*
+ * UniprotSequence drops any space characters
+ */
+ assertEquals("MHAPLVSKDL", entry.getUniprotSequence()
+ .getContent());
+
+ assertEquals(2, entry.getProtein().getName().size());
+ assertEquals("Mitogen-activated protein kinase 13", entry.getProtein()
+ .getName().get(0));
+ assertEquals("Henry", entry.getProtein().getName().get(1));
+
+ /*
+ * Check sequence features
+ */
+ Vector<SequenceFeature> features = entry.getFeature();
+ assertEquals(3, features.size());
+ SequenceFeature sf = features.get(0);
+ assertEquals("signal peptide", sf.getType());
+ assertNull(sf.getDescription());
+ assertNull(sf.getStatus());
+ assertEquals(1, sf.getPosition()); // wrong - Castor bug??
+ assertEquals(1, sf.getBegin());
+ assertEquals(18, sf.getEnd());
+ sf = features.get(1);
+ assertEquals("propeptide", sf.getType());
+ assertEquals("Activation peptide", sf.getDescription());
+ assertEquals(19, sf.getPosition()); // wrong - Castor bug??
+ assertEquals(19, sf.getBegin());
+ assertEquals(20, sf.getEnd());
+ sf = features.get(2);
+ assertEquals("chain", sf.getType());
+ assertEquals("Granzyme B", sf.getDescription());
+ assertEquals(21, sf.getPosition()); // wrong - Castor bug??
+ assertEquals(21, sf.getBegin());
+ assertEquals(247, sf.getEnd());
+
+ /*
+ * Check cross-references
+ */
+ Vector<PDBEntry> xrefs = entry.getDbReference();
+ assertEquals(2, xrefs.size());
+
+ PDBEntry xref = xrefs.get(0);
+ assertEquals("2FSQ", xref.getId());
+ assertEquals("PDB", xref.getType());
+ assertEquals(2, xref.getProperty().size());
+ assertEquals("X-ray", xref.getProperty().get("method"));
+ assertEquals("1.40", xref.getProperty().get("resolution"));
+
+ xref = xrefs.get(1);
+ assertEquals("2FSR", xref.getId());
+ assertEquals("PDBsum", xref.getType());
+ assertNull(xref.getProperty());
+ }
+
+ /**
+ * Test the method that formats the sequence name in Fasta style
+ */
+ @Test
+ public void testConstructSequenceFastaHeader()
+ {
+ Uniprot u = new Uniprot();
+ Reader reader = new StringReader(UNIPROT_XML);
+ Vector<UniprotEntry> entries = u.getUniprotEntries(reader);
+ UniprotEntry entry = entries.get(0);
+
+ // source + accession ids + names + protein names
+ String expectedName = ">UniProt/Swiss-Prot|A9CKP4|A9CKP5|A9CKP4_AGRT5|A9CKP4_AGRT6 Mitogen-activated protein kinase 13 Henry";
+ assertEquals(expectedName, Uniprot.constructSequenceFastaHeader(entry)
+ .toString());
+ }
+}