X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fdbsources%2FUniprot.java;h=c9db7f2979a4b729f1d60fe4c15452bcb8331456;hb=5faac1104c41690e38e259fae5bb700236021360;hp=6f5f03315bd4099ee026f886580c6d36ab904f9e;hpb=3da878124135ff033f42d19d8733891b09e953cd;p=jalview.git diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 6f5f033..c9db7f2 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -20,6 +20,24 @@ */ package jalview.ws.dbsources; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Vector; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; +import javax.xml.bind.JAXBException; +import javax.xml.stream.FactoryConfigurationError; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import com.stevesoft.pat.Regex; + import jalview.bin.Cache; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; @@ -32,7 +50,6 @@ import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; import jalview.util.StringUtils; import jalview.ws.seqfetcher.DbSourceProxyImpl; -import jalview.xml.binding.embl.ROOT; import jalview.xml.binding.uniprot.DbReferenceType; import jalview.xml.binding.uniprot.Entry; import jalview.xml.binding.uniprot.FeatureType; @@ -40,23 +57,6 @@ import jalview.xml.binding.uniprot.LocationType; import jalview.xml.binding.uniprot.PositionType; import jalview.xml.binding.uniprot.PropertyType; -import java.io.InputStream; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.List; -import java.util.Vector; - -import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBElement; -import javax.xml.bind.JAXBException; -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; - -import com.stevesoft.pat.Regex; - /** * This class queries the Uniprot database for sequence data, unmarshals the * returned XML, and converts it to Jalview Sequence records (including attached @@ -139,29 +139,34 @@ public class Uniprot extends DbSourceProxyImpl startQuery(); try { - queries = queries.toUpperCase().replaceAll( + queries = queries.toUpperCase(Locale.ROOT).replaceAll( "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", ""); AlignmentI al = null; - String downloadstring = getDomain() + "/uniprot/" + queries - + ".xml"; + String downloadstring = getDomain() + "/uniprot/" + queries + ".xml"; URL url = new URL(downloadstring); - URLConnection urlconn = url.openConnection(); - InputStream istr = urlconn.getInputStream(); - List entries = getUniprotEntries(istr); - if (entries != null) + HttpURLConnection urlconn = (HttpURLConnection) url.openConnection(); + // anything other than 200 means we don't have data + // TODO: JAL-3882 reuse the EnsemblRestClient's fair + // use/backoff logic to retry when the server tells us to go away + if (urlconn.getResponseCode() == 200) { - List seqs = new ArrayList<>(); - for (Entry entry : entries) + InputStream istr = urlconn.getInputStream(); + List entries = getUniprotEntries(istr); + if (entries != null) { - seqs.add(uniprotEntryToSequence(entry)); + List seqs = new ArrayList<>(); + for (Entry entry : entries) + { + seqs.add(uniprotEntryToSequence(entry)); + } + al = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); } - al = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); } - stopQuery(); return al; + } catch (Exception e) { throw (e); @@ -186,19 +191,20 @@ public class Uniprot extends DbSourceProxyImpl String seqString = entry.getSequence().getValue().replaceAll("\\s*", ""); - SequenceI sequence = new Sequence(id, - seqString); + SequenceI sequence = new Sequence(id, seqString); sequence.setDescription(getUniprotEntryDescription(entry)); - + final String uniprotRecordVersion = "" + entry.getVersion(); /* * add a 'self' DBRefEntry for each accession */ final String dbVersion = getDbVersion(); List dbRefs = new ArrayList<>(); + boolean canonical = true; for (String accessionId : entry.getAccession()) { - DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion, - accessionId); + DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, + uniprotRecordVersion, accessionId, null, canonical); + canonical = false; dbRefs.add(dbRef); } @@ -235,13 +241,23 @@ public class Uniprot extends DbSourceProxyImpl // remove version String[] vrs = cdsId.split("\\."); String version = vrs.length > 1 ? vrs[1] - : DBRefSource.UNIPROT + ":" + dbVersion; + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]); + // TODO: process VARIANT features to allow EMBLCDS record's product to + // match Uniprot + dbr.setCanonical(true); dbRefs.add(dbr); } } - if ("Ensembl".equals(type)) + if (type != null + && type.toLowerCase(Locale.ROOT).startsWith("ensembl")) { + // remove version + String[] vrs = dbref.getId().split("\\."); + String version = vrs.length > 1 ? vrs[1] + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; + dbr.setAccessionId(vrs[0]); + dbr.setVersion(version); /* * e.g. Uniprot accession Q9BXM7 has * @@ -254,8 +270,12 @@ public class Uniprot extends DbSourceProxyImpl "protein sequence ID"); if (cdsId != null && cdsId.trim().length() > 0) { + // remove version + String[] cdsVrs = cdsId.split("\\."); + String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1] + : DBRefSource.UNIPROT + ":" + uniprotRecordVersion; dbr = new DBRefEntry(DBRefSource.ENSEMBL, - DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim()); + DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]); dbRefs.add(dbr); } } @@ -503,10 +523,12 @@ public class Uniprot extends DbSourceProxyImpl XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(is); javax.xml.bind.Unmarshaller um = jc.createUnmarshaller(); - JAXBElement uniprotElement = - um.unmarshal(streamReader, jalview.xml.binding.uniprot.Uniprot.class); - jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement.getValue(); - + JAXBElement uniprotElement = um + .unmarshal(streamReader, + jalview.xml.binding.uniprot.Uniprot.class); + jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement + .getValue(); + if (uniprot != null && !uniprot.getEntry().isEmpty()) { entries = uniprot.getEntry(); @@ -514,6 +536,14 @@ public class Uniprot extends DbSourceProxyImpl } catch (JAXBException | XMLStreamException | FactoryConfigurationError e) { + if (e instanceof javax.xml.bind.UnmarshalException + && e.getCause() != null + && e.getCause() instanceof XMLStreamException + && e.getCause().getMessage().contains("[row,col]:[1,1]")) + { + // trying to parse an empty stream + return null; + } e.printStackTrace(); } return entries;