X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdbsources%2FUniprot.java;h=69ee379c6a0b5144884860703d962a2520662757;hb=9bfda76754fb426a471e1c12c9f0ca0c4c1784bc;hp=371eb50bc227365a4e74ae147aa73ea94a0d6261;hpb=57738a1f3c19b1c3a00bd3ac5108f8cd0af32f99;p=jalview.git diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 371eb50..69ee379 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -20,9 +20,26 @@ */ package jalview.ws.dbsources; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; +import java.util.Vector; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; +import javax.xml.bind.JAXBException; +import javax.xml.stream.FactoryConfigurationError; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import com.stevesoft.pat.Regex; import jalview.bin.Cache; +import jalview.bin.Console; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -41,23 +58,6 @@ import jalview.xml.binding.uniprot.LocationType; import jalview.xml.binding.uniprot.PositionType; import jalview.xml.binding.uniprot.PropertyType; -import java.io.InputStream; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.Vector; - -import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBElement; -import javax.xml.bind.JAXBException; -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; - -import com.stevesoft.pat.Regex; - /** * This class queries the Uniprot database for sequence data, unmarshals the * returned XML, and converts it to Jalview Sequence records (including attached @@ -194,7 +194,7 @@ public class Uniprot extends DbSourceProxyImpl SequenceI sequence = new Sequence(id, seqString); sequence.setDescription(getUniprotEntryDescription(entry)); - + final String uniprotRecordVersion = "" + entry.getVersion(); /* * add a 'self' DBRefEntry for each accession */ @@ -203,8 +203,8 @@ public class Uniprot extends DbSourceProxyImpl boolean canonical = true; for (String accessionId : entry.getAccession()) { - DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion, - accessionId, null, canonical); + DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, + uniprotRecordVersion, accessionId, null, canonical); canonical = false; dbRefs.add(dbRef); } @@ -242,13 +242,23 @@ public class Uniprot extends DbSourceProxyImpl // remove version String[] vrs = cdsId.split("\\."); String version = vrs.length > 1 ? vrs[1] - : DBRefSource.UNIPROT + ":" + dbVersion; + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]); + // TODO: process VARIANT features to allow EMBLCDS record's product to + // match Uniprot + dbr.setCanonical(true); dbRefs.add(dbr); } } - if ("Ensembl".equals(type)) + if (type != null + && type.toLowerCase(Locale.ROOT).startsWith("ensembl")) { + // remove version + String[] vrs = dbref.getId().split("\\."); + String version = vrs.length > 1 ? vrs[1] + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; + dbr.setAccessionId(vrs[0]); + dbr.setVersion(version); /* * e.g. Uniprot accession Q9BXM7 has * @@ -261,8 +271,12 @@ public class Uniprot extends DbSourceProxyImpl "protein sequence ID"); if (cdsId != null && cdsId.trim().length() > 0) { + // remove version + String[] cdsVrs = cdsId.split("\\."); + String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1] + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; dbr = new DBRefEntry(DBRefSource.ENSEMBL, - DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim()); + DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]); dbRefs.add(dbr); } } @@ -279,19 +293,68 @@ public class Uniprot extends DbSourceProxyImpl LocationType location = uf.getLocation(); int start = 0; int end = 0; + String uncertain_start = null, uncertain_end = null, + uncertain_pos = null; if (location.getPosition() != null) { - start = location.getPosition().getPosition().intValue(); - end = start; + if (location.getPosition().getPosition() == null + || "unknown".equals(location.getPosition().getStatus())) + { + Console.warn( + "Ignoring single position feature with uncertain location " + + uf.getType() + ":" + getDescription(uf)); + uncertain_pos = location.getPosition().getStatus() == null + ? "unknown" + : location.getPosition().getStatus(); + } + else + { + start = location.getPosition().getPosition().intValue(); + end = start; + } } else { - start = location.getBegin().getPosition().intValue(); - end = location.getEnd().getPosition().intValue(); + if (location.getBegin().getPosition() == null) + { + Console.warn( + "Setting start position of feature with uncertain start to 1: " + + uf.getType() + ":" + getDescription(uf)); + start = sequence.getStart(); + uncertain_start = location.getBegin().getStatus(); + } + else + { + start = location.getBegin().getPosition().intValue(); + } + if (location.getEnd().getPosition() == null) + { + Console.warn( + "Setting start position of feature with uncertain start to 1: " + + uf.getType() + ":" + getDescription(uf)); + end = sequence.getEnd(); + uncertain_end = location.getEnd().getStatus(); + } + else + { + end = location.getEnd().getPosition().intValue(); + } } SequenceFeature sf = new SequenceFeature(uf.getType(), getDescription(uf), start, end, "Uniprot"); sf.setStatus(uf.getStatus()); + if (uncertain_end != null) + { + sf.setValue("end_status", uncertain_end); + } + if (uncertain_start != null) + { + sf.setValue("start_status", uncertain_start); + } + if (uncertain_pos != null) + { + sf.setValue("pos_status", uncertain_pos); + } sequence.addSequenceFeature(sf); } }