*/
package jalview.ws.dbsources;
+import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.PDBEntry;
+import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.UniprotEntry;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
+import java.util.ArrayList;
import java.util.Vector;
import org.exolab.castor.xml.Unmarshaller;
private static final String BAR_DELIMITER = "|";
- private static final String NEWLINE = "\n";
-
private static org.exolab.castor.mapping.Mapping map;
/**
super();
addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
addDbSourceProperty(DBRefSource.PROTSEQDB);
- // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
}
/*
*
* @see jalview.ws.DbSourceProxy#getAccessionSeparator()
*/
+ @Override
public String getAccessionSeparator()
{
- return null; // ";";
+ return null;
}
/*
*
* @see jalview.ws.DbSourceProxy#getAccessionValidator()
*/
+ @Override
public Regex getAccessionValidator()
{
return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
*
* @see jalview.ws.DbSourceProxy#getDbSource()
*/
+ @Override
public String getDbSource()
{
return DBRefSource.UNIPROT;
*
* @see jalview.ws.DbSourceProxy#getDbVersion()
*/
+ @Override
public String getDbVersion()
{
return "0"; // we really don't know what version we're on.
*
* @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
*/
+ @Override
public AlignmentI getSequenceRecords(String queries) throws Exception
{
startQuery();
if (entries != null)
{
- /*
- * If Castor binding included sequence@length, we could guesstimate the
- * size of buffer to hold the alignment
- */
- StringBuffer result = new StringBuffer(128);
- // First, make the new sequences
+ ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
for (UniprotEntry entry : entries)
{
- StringBuilder name = constructSequenceFastaHeader(entry);
-
- result.append(name).append(NEWLINE)
- .append(entry.getUniprotSequence().getContent())
- .append(NEWLINE);
+ seqs.add(uniprotEntryToSequenceI(entry));
}
+ al = new Alignment(seqs.toArray(new SequenceI[0]));
- // Then read in the features and apply them to the dataset
- al = parseResult(result.toString());
- if (al != null)
- {
- // Decorate the alignment with database entries.
- addUniprotXrefs(al, entries);
- }
- else
- {
- results = result;
- }
}
stopQuery();
return al;
}
/**
- * Construct a Fasta-format sequence header by concatenating the source,
- * accession id(s) and name(s), delimited by '|', plus any protein names, now
- * with space rather than bar delimiter
*
* @param entry
- * @return
+ * UniprotEntry
+ * @return SequenceI instance created from the UniprotEntry instance
*/
- public static StringBuilder constructSequenceFastaHeader(
- UniprotEntry entry)
- {
- StringBuilder name = new StringBuilder(32);
- name.append(">UniProt/Swiss-Prot");
+ public SequenceI uniprotEntryToSequenceI(UniprotEntry entry){
+ String id = getUniprotEntryId(entry);
+ SequenceI sequence = new Sequence(id, entry.getUniprotSequence()
+ .getContent());
+ sequence.setDescription(getUniprotEntryDescription(entry));
+
+ final String dbVersion = getDbVersion();
+ ArrayList<DBRefEntry> dbRefs = new ArrayList<DBRefEntry>();
for (String accessionId : entry.getAccession())
{
- name.append(BAR_DELIMITER);
- name.append(accessionId);
+ DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
+ accessionId);
+ dbRefs.add(dbRef);
}
- for (String n : entry.getName())
+ sequence.setSourceDBRef((dbRefs != null && dbRefs.size() > 0) ? dbRefs
+ .get(0) : null);
+
+ Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
+ for (PDBEntry pdb : entry.getDbReference())
{
- name.append(BAR_DELIMITER);
- name.append(n);
+ DBRefEntry dbr = new DBRefEntry();
+ dbr.setSource(pdb.getType());
+ dbr.setAccessionId(pdb.getId());
+ dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion);
+ dbRefs.add(dbr);
+ if ("PDB".equals(pdb.getType()))
+ {
+ onlyPdbEntries.addElement(pdb);
+ }
}
- if (entry.getProtein() != null && entry.getProtein().getName() != null)
+ sequence.setPDBId(onlyPdbEntries);
+ if (entry.getFeature() != null)
{
- for (String nm : entry.getProtein().getName())
+ for (SequenceFeature sf : entry.getFeature())
{
- name.append(" ").append(nm);
+ sf.setFeatureGroup("Uniprot");
+ sequence.addSequenceFeature(sf);
}
}
- return name;
+ sequence.setDBRefs(dbRefs.toArray(new DBRefEntry[0]));
+ return sequence;
}
/**
- * add an ordered set of UniprotEntry objects to an ordered set of seuqences.
*
- * @param al
- * - a sequence of n sequences
- * @param entries
- * a list of n uniprot entries to be analysed.
+ * @param entry
+ * UniportEntry
+ * @return protein name(s) delimited by a white space character
*/
- public void addUniprotXrefs(AlignmentI al, Vector<UniprotEntry> entries)
+ public static String getUniprotEntryDescription(UniprotEntry entry)
{
- final String dbVersion = getDbVersion();
-
- for (int i = 0; i < entries.size(); i++)
+ StringBuilder desc = new StringBuilder(32);
+ if (entry.getProtein() != null && entry.getProtein().getName() != null)
{
- UniprotEntry entry = entries.elementAt(i);
- Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
- Vector<DBRefEntry> dbxrefs = new Vector<DBRefEntry>();
-
- for (PDBEntry pdb : entry.getDbReference())
- {
- DBRefEntry dbr = new DBRefEntry();
- dbr.setSource(pdb.getType());
- dbr.setAccessionId(pdb.getId());
- dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion);
- dbxrefs.addElement(dbr);
- if ("PDB".equals(pdb.getType()))
- {
- onlyPdbEntries.addElement(pdb);
- }
- }
-
- SequenceI sq = al.getSequenceAt(i);
- while (sq.getDatasetSequence() != null)
- {
- sq = sq.getDatasetSequence();
- }
-
- for (String accessionId : entry.getAccession())
+ for (String nm : entry.getProtein().getName())
{
- /*
- * add as uniprot whether retrieved from uniprot or uniprot_name
- */
- sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
- accessionId));
+ desc.append(nm).append(" ");
}
+ }
+ return desc.toString();
+ }
- for (DBRefEntry dbRef : dbxrefs)
- {
- sq.addDBRef(dbRef);
- }
- sq.setPDBId(onlyPdbEntries);
- if (entry.getFeature() != null)
- {
- for (SequenceFeature sf : entry.getFeature())
- {
- sf.setFeatureGroup("Uniprot");
- sq.addSequenceFeature(sf);
- }
- }
+ /**
+ *
+ * @param entry
+ * UniportEntry
+ * @return The accession id(s) and name(s) delimited by '|'.
+ */
+ public static String getUniprotEntryId(UniprotEntry entry)
+ {
+ StringBuilder name = new StringBuilder(32);
+ name.append("UniProt/Swiss-Prot");
+ for (String accessionId : entry.getAccession())
+ {
+ name.append(BAR_DELIMITER);
+ name.append(accessionId);
}
+ for (String n : entry.getName())
+ {
+ name.append(BAR_DELIMITER);
+ name.append(n);
+ }
+ return name.toString();
}
/*
*
* @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
*/
+ @Override
public boolean isValidReference(String accession)
{
// TODO: make the following a standard validator
/**
* return LDHA_CHICK uniprot entry
*/
+ @Override
public String getTestQuery()
{
return "P00340";
}
+ @Override
public String getDbName()
{
return "Uniprot"; // getDbSource();