*/
package jalview.ws.dbsources;
+import java.io.InputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Locale;
+import java.util.Vector;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import javax.xml.stream.FactoryConfigurationError;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import com.stevesoft.pat.Regex;
import jalview.bin.Cache;
+import jalview.bin.Console;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
import jalview.xml.binding.uniprot.PositionType;
import jalview.xml.binding.uniprot.PropertyType;
-import java.io.InputStream;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Vector;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBElement;
-import javax.xml.bind.JAXBException;
-import javax.xml.stream.FactoryConfigurationError;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
-import com.stevesoft.pat.Regex;
-
/**
* This class queries the Uniprot database for sequence data, unmarshals the
* returned XML, and converts it to Jalview Sequence records (including attached
SequenceI sequence = new Sequence(id, seqString);
sequence.setDescription(getUniprotEntryDescription(entry));
-
+ final String uniprotRecordVersion = "" + entry.getVersion();
/*
* add a 'self' DBRefEntry for each accession
*/
boolean canonical = true;
for (String accessionId : entry.getAccession())
{
- DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
- accessionId, null, canonical);
+ DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT,
+ uniprotRecordVersion, accessionId, null, canonical);
canonical = false;
dbRefs.add(dbRef);
}
// remove version
String[] vrs = cdsId.split("\\.");
String version = vrs.length > 1 ? vrs[1]
- : DBRefSource.UNIPROT + ":" + dbVersion;
+ : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]);
+ // TODO: process VARIANT features to allow EMBLCDS record's product to
+ // match Uniprot
+ dbr.setCanonical(true);
dbRefs.add(dbr);
}
}
- if ("Ensembl".equals(type))
+ if (type != null
+ && type.toLowerCase(Locale.ROOT).startsWith("ensembl"))
{
+ // remove version
+ String[] vrs = dbref.getId().split("\\.");
+ String version = vrs.length > 1 ? vrs[1]
+ : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
+ dbr.setAccessionId(vrs[0]);
+ dbr.setVersion(version);
/*
* e.g. Uniprot accession Q9BXM7 has
* <dbReference type="Ensembl" id="ENST00000321556">
"protein sequence ID");
if (cdsId != null && cdsId.trim().length() > 0)
{
+ // remove version
+ String[] cdsVrs = cdsId.split("\\.");
+ String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1]
+ : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
dbr = new DBRefEntry(DBRefSource.ENSEMBL,
- DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim());
+ DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]);
dbRefs.add(dbr);
}
}
LocationType location = uf.getLocation();
int start = 0;
int end = 0;
+ String uncertain_start = null, uncertain_end = null,
+ uncertain_pos = null;
if (location.getPosition() != null)
{
- start = location.getPosition().getPosition().intValue();
- end = start;
+ if (location.getPosition().getPosition() == null
+ || !"unknown".equals(location.getPosition().getStatus()))
+ {
+ Console.warn(
+ "Ignoring single position feature with uncertain location "
+ + uf.getType() + ":" + getDescription(uf));
+ uncertain_pos = location.getPosition().getStatus() == null
+ ? "unknown"
+ : location.getPosition().getStatus();
+ }
+ else
+ {
+ start = location.getPosition().getPosition().intValue();
+ end = start;
+ }
}
else
{
- start = location.getBegin().getPosition().intValue();
- end = location.getEnd().getPosition().intValue();
+ if (location.getBegin().getPosition() == null)
+ {
+ Console.warn(
+ "Setting start position of feature with uncertain start to 1: "
+ + uf.getType() + ":" + getDescription(uf));
+ start = sequence.getStart();
+ uncertain_start = location.getBegin().getStatus();
+ }
+ else
+ {
+ start = location.getBegin().getPosition().intValue();
+ }
+ if (location.getEnd().getPosition() == null)
+ {
+ Console.warn(
+ "Setting start position of feature with uncertain start to 1: "
+ + uf.getType() + ":" + getDescription(uf));
+ end = sequence.getEnd();
+ uncertain_end = location.getEnd().getStatus();
+ }
+ else
+ {
+ end = location.getEnd().getPosition().intValue();
+ }
}
SequenceFeature sf = new SequenceFeature(uf.getType(),
getDescription(uf), start, end, "Uniprot");
sf.setStatus(uf.getStatus());
+ if (uncertain_end != null)
+ {
+ sf.setValue("end_status", uncertain_end);
+ }
+ if (uncertain_start != null)
+ {
+ sf.setValue("start_status", uncertain_start);
+ }
+ if (uncertain_pos != null)
+ {
+ sf.setValue("pos_status", uncertain_pos);
+ }
sequence.addSequenceFeature(sf);
}
}