X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;fp=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=3ec63201bf3f63c9ba31acf12d1a6e371eaa35e2;hb=586ade46bdcd05ff028a1cff82c3c527326d28ec;hp=d0656667f7891beaf88d1c2be107384235e58ebb;hpb=adcef27f5747b4e70e89a56c3735bc3afb8ce9bf;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index d065666..3ec6320 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -20,44 +20,17 @@ */ package jalview.ws.sifts; -import jalview.analysis.AlignSeq; -import jalview.analysis.scoremodels.ScoreMatrix; -import jalview.analysis.scoremodels.ScoreModels; -import jalview.api.DBRefEntryI; -import jalview.api.SiftsClientI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; -import jalview.datamodel.SequenceI; -import jalview.io.StructureFile; -import jalview.schemes.ResidueProperties; -import jalview.structure.StructureMapping; -import jalview.util.Comparison; -import jalview.util.DBRefUtils; -import jalview.util.Format; -import jalview.xml.binding.sifts.Entry; -import jalview.xml.binding.sifts.Entry.Entity; -import jalview.xml.binding.sifts.Entry.Entity.Segment; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; - import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.net.URL; import java.net.URLConnection; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -67,10 +40,33 @@ import java.util.TreeMap; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; import javax.xml.bind.Unmarshaller; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamReader; +import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.api.DBRefEntryI; +import jalview.api.SiftsClientI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.io.StructureFile; +import jalview.schemes.ResidueProperties; +import jalview.structure.StructureMapping; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; +import jalview.util.Format; +import jalview.util.Platform; +import jalview.xml.binding.sifts.Entry; +import jalview.xml.binding.sifts.Entry.Entity; +import jalview.xml.binding.sifts.Entry.Entity.Segment; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; import mc_view.Atom; import mc_view.PDBChain; @@ -100,8 +96,6 @@ public class SiftsClient implements SiftsClientI */ private jalview.datamodel.Mapping seqFromPdbMapping; - private static final int BUFFER_SIZE = 4096; - public static final int UNASSIGNED = Integer.MIN_VALUE; private static final int PDB_RES_POS = 0; @@ -116,10 +110,15 @@ public class SiftsClient implements SiftsClientI private final static String NEWLINE = System.lineSeparator(); + private static final boolean GET_STREAM = false; + private static final boolean CACHE_FILE = true; + private String curSourceDBRef; private HashSet curDBRefAccessionIdsString; + private boolean doCache = false; + private enum CoordinateSys { UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); @@ -164,8 +163,31 @@ public class SiftsClient implements SiftsClientI { this.pdb = pdb; this.pdbId = pdb.getId(); - File siftsFile = getSiftsFile(pdbId); - siftsEntry = parseSIFTs(siftsFile); + if (doCache) { + File siftsFile = getSiftsFile(pdbId); + siftsEntry = parseSIFTs(siftsFile); + } else { + siftsEntry = parseSIFTSStreamFor(pdbId); + } + } + + /** + * A more streamlined version of SIFT reading that allows for streaming of the data. + * + * @param pdbId + * @return + * @throws SiftsException + */ + private static Entry parseSIFTSStreamFor(String pdbId) throws SiftsException + { + try + { + InputStream is = (InputStream) downloadSifts(pdbId, GET_STREAM); + return parseSIFTs(is); + } catch (Exception e) + { + throw new SiftsException(e.getMessage()); + } } /** @@ -179,19 +201,25 @@ public class SiftsClient implements SiftsClientI */ private Entry parseSIFTs(File siftFile) throws SiftsException { - try (InputStream in = new FileInputStream(siftFile); - GZIPInputStream gzis = new GZIPInputStream(in);) + try (InputStream in = new FileInputStream(siftFile)) { + return parseSIFTs(in); + } catch (Exception e) + { + e.printStackTrace(); + throw new SiftsException(e.getMessage()); + } + } + + private static Entry parseSIFTs(InputStream in) throws Exception { + try (GZIPInputStream gzis = new GZIPInputStream(in);) { // System.out.println("File : " + siftFile.getAbsolutePath()); JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); - return (Entry) um.unmarshal(streamReader); - } catch (Exception e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); + JAXBElement jbe = um.unmarshal(streamReader, Entry.class); + return jbe.getValue(); } } @@ -221,14 +249,14 @@ public class SiftsClient implements SiftsClientI // The line below is required for unit testing... don't comment it out!!! System.out.println(">>> SIFTS File already downloaded for " + pdbId); - if (isFileOlderThanThreshold(siftsFile, + if (Platform.isFileOlderThanThreshold(siftsFile, SiftsSettings.getCacheThresholdInDays())) { File oldSiftsFile = new File(siftsFileName + "_old"); siftsFile.renameTo(oldSiftsFile); try { - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + siftsFile = downloadSiftsFile(pdbId); oldSiftsFile.delete(); return siftsFile; } catch (IOException e) @@ -245,7 +273,7 @@ public class SiftsClient implements SiftsClientI } try { - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + siftsFile = downloadSiftsFile(pdbId); } catch (IOException e) { throw new SiftsException(e.getMessage()); @@ -254,35 +282,6 @@ public class SiftsClient implements SiftsClientI } /** - * This method enables checking if a cached file has exceeded a certain - * threshold(in days) - * - * @param file - * the cached file - * @param noOfDays - * the threshold in days - * @return - */ - public static boolean isFileOlderThanThreshold(File file, int noOfDays) - { - Path filePath = file.toPath(); - BasicFileAttributes attr; - int diffInDays = 0; - try - { - attr = Files.readAttributes(filePath, BasicFileAttributes.class); - diffInDays = (int) ((new Date().getTime() - - attr.lastModifiedTime().toMillis()) - / (1000 * 60 * 60 * 24)); - // System.out.println("Diff in days : " + diffInDays); - } catch (IOException e) - { - e.printStackTrace(); - } - return noOfDays <= diffInDays; - } - - /** * Download a SIFTs XML file for a given PDB Id from an FTP repository * * @param pdbId @@ -292,39 +291,48 @@ public class SiftsClient implements SiftsClientI */ public static File downloadSiftsFile(String pdbId) throws SiftsException, IOException + { + return (File) downloadSifts(pdbId, CACHE_FILE); + } + + /** + * Download SIFTs XML with the option to cache a file or to get a stream. + * + * @param pdbId + * @param asFile + * @return + * @throws IOException + */ + private static Object downloadSifts(String pdbId, boolean asFile) throws IOException { + pdbId = pdbId.toLowerCase(); if (pdbId.contains(".cif")) { pdbId = pdbId.replace(".cif", ""); } String siftFile = pdbId + ".xml.gz"; - String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; - String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() - + siftFile; - File siftsDownloadDir = new File( - SiftsSettings.getSiftDownloadDirectory()); - if (!siftsDownloadDir.exists()) + + File downloadTo = null; + if (asFile) { - siftsDownloadDir.mkdirs(); + downloadTo = new File( + SiftsSettings.getSiftDownloadDirectory() + siftFile); + File siftsDownloadDir = new File(SiftsSettings.getSiftDownloadDirectory()); + if (!siftsDownloadDir.exists()) + { + siftsDownloadDir.mkdirs(); + } } - // System.out.println(">> Download ftp url : " + siftsFileFTPURL); - // long now = System.currentTimeMillis(); + String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; URL url = new URL(siftsFileFTPURL); URLConnection conn = url.openConnection(); - InputStream inputStream = conn.getInputStream(); - FileOutputStream outputStream = new FileOutputStream( - downloadedSiftsFile); - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead = -1; - while ((bytesRead = inputStream.read(buffer)) != -1) - { - outputStream.write(buffer, 0, bytesRead); - } - outputStream.close(); - inputStream.close(); - // System.out.println(">>> File downloaded : " + downloadedSiftsFile - // + " took " + (System.currentTimeMillis() - now) + "ms"); - return new File(downloadedSiftsFile); + InputStream is = conn.getInputStream(); + if (!asFile) + return is; + // This is MUCH more efficent in JavaScript, as we already have the bytes + Platform.streamToFile(is, downloadTo); + is.close(); + return downloadTo; } /** @@ -631,7 +639,7 @@ public class SiftsClient implements SiftsClientI for (Residue residue : residues) { boolean isObserved = isResidueObserved(residue); - int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(), + int pdbeIndex = Platform.getLeadingIntegerValue(residue.getDbResNum(), UNASSIGNED); int currSeqIndex = UNASSIGNED; List cRefDbs = residue.getCrossRefDb(); @@ -643,7 +651,7 @@ public class SiftsClient implements SiftsClientI pdbRefDb = cRefDb; if (firstPDBResNum == UNASSIGNED) { - firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(), + firstPDBResNum = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(), UNASSIGNED); } else @@ -658,7 +666,7 @@ public class SiftsClient implements SiftsClientI if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) && isAccessionMatched(cRefDb.getDbAccessionId())) { - currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(), + currSeqIndex = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(), UNASSIGNED); if (pdbRefDb != null) { @@ -708,9 +716,9 @@ public class SiftsClient implements SiftsClientI { int resNum = (pdbRefDb == null) - ? getLeadingIntegerValue(residue.getDbResNum(), + ? Platform.getLeadingIntegerValue(residue.getDbResNum(), UNASSIGNED) - : getLeadingIntegerValue(pdbRefDb.getDbResNum(), + : Platform.getLeadingIntegerValue(pdbRefDb.getDbResNum(), UNASSIGNED); if (isObserved) @@ -731,29 +739,6 @@ public class SiftsClient implements SiftsClientI } /** - * Get the leading integer part of a string that begins with an integer. - * - * @param input - * - the string input to process - * @param failValue - * - value returned if unsuccessful - * @return - */ - static int getLeadingIntegerValue(String input, int failValue) - { - if (input == null) - { - return failValue; - } - String[] parts = input.split("(?=\\D)(?<=\\d)"); - if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) - { - return Integer.valueOf(parts[0]); - } - return failValue; - } - - /** * * @param chainId * Target chain to populate mapping of its atom positions.