X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdbsources%2FUniprot.java;h=69ee379c6a0b5144884860703d962a2520662757;hb=9bfda76754fb426a471e1c12c9f0ca0c4c1784bc;hp=b9fe52f607fa5d970405292dcb40478d3821d537;hpb=d80fbc33dfc7ece325c896b33c3296a075e48a30;p=jalview.git diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index b9fe52f..69ee379 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -20,9 +20,26 @@ */ package jalview.ws.dbsources; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; +import java.util.Vector; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; +import javax.xml.bind.JAXBException; +import javax.xml.stream.FactoryConfigurationError; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import com.stevesoft.pat.Regex; import jalview.bin.Cache; +import jalview.bin.Console; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -41,23 +58,6 @@ import jalview.xml.binding.uniprot.LocationType; import jalview.xml.binding.uniprot.PositionType; import jalview.xml.binding.uniprot.PropertyType; -import java.io.InputStream; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.Vector; - -import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBElement; -import javax.xml.bind.JAXBException; -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; - -import com.stevesoft.pat.Regex; - /** * This class queries the Uniprot database for sequence data, unmarshals the * returned XML, and converts it to Jalview Sequence records (including attached @@ -144,13 +144,12 @@ public class Uniprot extends DbSourceProxyImpl "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", ""); AlignmentI al = null; - String downloadstring = getDomain() + "/uniprot/" + queries - + ".xml"; + String downloadstring = getDomain() + "/uniprot/" + queries + ".xml"; URL url = new URL(downloadstring); - HttpURLConnection urlconn = (HttpURLConnection)url.openConnection(); + HttpURLConnection urlconn = (HttpURLConnection) url.openConnection(); // anything other than 200 means we don't have data - // TODO: JAL-3882 reuse the EnsemblRestClient's fair + // TODO: JAL-3882 reuse the EnsemblRestClient's fair // use/backoff logic to retry when the server tells us to go away if (urlconn.getResponseCode() == 200) { @@ -168,7 +167,7 @@ public class Uniprot extends DbSourceProxyImpl } stopQuery(); return al; - + } catch (Exception e) { throw (e); @@ -193,21 +192,20 @@ public class Uniprot extends DbSourceProxyImpl String seqString = entry.getSequence().getValue().replaceAll("\\s*", ""); - SequenceI sequence = new Sequence(id, - seqString); + SequenceI sequence = new Sequence(id, seqString); sequence.setDescription(getUniprotEntryDescription(entry)); - + final String uniprotRecordVersion = "" + entry.getVersion(); /* * add a 'self' DBRefEntry for each accession */ final String dbVersion = getDbVersion(); List dbRefs = new ArrayList<>(); - boolean canonical=true; + boolean canonical = true; for (String accessionId : entry.getAccession()) { - DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion, - accessionId,null,canonical); - canonical=false; + DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, + uniprotRecordVersion, accessionId, null, canonical); + canonical = false; dbRefs.add(dbRef); } @@ -244,13 +242,23 @@ public class Uniprot extends DbSourceProxyImpl // remove version String[] vrs = cdsId.split("\\."); String version = vrs.length > 1 ? vrs[1] - : DBRefSource.UNIPROT + ":" + dbVersion; + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]); + // TODO: process VARIANT features to allow EMBLCDS record's product to + // match Uniprot + dbr.setCanonical(true); dbRefs.add(dbr); } } - if ("Ensembl".equals(type)) + if (type != null + && type.toLowerCase(Locale.ROOT).startsWith("ensembl")) { + // remove version + String[] vrs = dbref.getId().split("\\."); + String version = vrs.length > 1 ? vrs[1] + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; + dbr.setAccessionId(vrs[0]); + dbr.setVersion(version); /* * e.g. Uniprot accession Q9BXM7 has * @@ -263,8 +271,12 @@ public class Uniprot extends DbSourceProxyImpl "protein sequence ID"); if (cdsId != null && cdsId.trim().length() > 0) { + // remove version + String[] cdsVrs = cdsId.split("\\."); + String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1] + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; dbr = new DBRefEntry(DBRefSource.ENSEMBL, - DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim()); + DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]); dbRefs.add(dbr); } } @@ -281,19 +293,68 @@ public class Uniprot extends DbSourceProxyImpl LocationType location = uf.getLocation(); int start = 0; int end = 0; + String uncertain_start = null, uncertain_end = null, + uncertain_pos = null; if (location.getPosition() != null) { - start = location.getPosition().getPosition().intValue(); - end = start; + if (location.getPosition().getPosition() == null + || "unknown".equals(location.getPosition().getStatus())) + { + Console.warn( + "Ignoring single position feature with uncertain location " + + uf.getType() + ":" + getDescription(uf)); + uncertain_pos = location.getPosition().getStatus() == null + ? "unknown" + : location.getPosition().getStatus(); + } + else + { + start = location.getPosition().getPosition().intValue(); + end = start; + } } else { - start = location.getBegin().getPosition().intValue(); - end = location.getEnd().getPosition().intValue(); + if (location.getBegin().getPosition() == null) + { + Console.warn( + "Setting start position of feature with uncertain start to 1: " + + uf.getType() + ":" + getDescription(uf)); + start = sequence.getStart(); + uncertain_start = location.getBegin().getStatus(); + } + else + { + start = location.getBegin().getPosition().intValue(); + } + if (location.getEnd().getPosition() == null) + { + Console.warn( + "Setting start position of feature with uncertain start to 1: " + + uf.getType() + ":" + getDescription(uf)); + end = sequence.getEnd(); + uncertain_end = location.getEnd().getStatus(); + } + else + { + end = location.getEnd().getPosition().intValue(); + } } SequenceFeature sf = new SequenceFeature(uf.getType(), getDescription(uf), start, end, "Uniprot"); sf.setStatus(uf.getStatus()); + if (uncertain_end != null) + { + sf.setValue("end_status", uncertain_end); + } + if (uncertain_start != null) + { + sf.setValue("start_status", uncertain_start); + } + if (uncertain_pos != null) + { + sf.setValue("pos_status", uncertain_pos); + } sequence.addSequenceFeature(sf); } } @@ -512,10 +573,12 @@ public class Uniprot extends DbSourceProxyImpl XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(is); javax.xml.bind.Unmarshaller um = jc.createUnmarshaller(); - JAXBElement uniprotElement = - um.unmarshal(streamReader, jalview.xml.binding.uniprot.Uniprot.class); - jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement.getValue(); - + JAXBElement uniprotElement = um + .unmarshal(streamReader, + jalview.xml.binding.uniprot.Uniprot.class); + jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement + .getValue(); + if (uniprot != null && !uniprot.getEntry().isEmpty()) { entries = uniprot.getEntry(); @@ -523,7 +586,10 @@ public class Uniprot extends DbSourceProxyImpl } catch (JAXBException | XMLStreamException | FactoryConfigurationError e) { - if (e instanceof javax.xml.bind.UnmarshalException && e.getCause()!=null && e.getCause() instanceof XMLStreamException && e.getCause().getMessage().contains("[row,col]:[1,1]")) + if (e instanceof javax.xml.bind.UnmarshalException + && e.getCause() != null + && e.getCause() instanceof XMLStreamException + && e.getCause().getMessage().contains("[row,col]:[1,1]")) { // trying to parse an empty stream return null;