X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=892ebd89c5f525b460dbcbb29b1bd0e0350f0a25;hb=d043ce47fc710d3eb2629ba926a8a7417bd67d8c;hp=c9c4618e15bb6b85c80c8513e1076507877c6ad2;hpb=d97bf67918528e7c2e1601a537d78eda2fbd2a5d;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index c9c4618..892ebd8 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -20,28 +20,6 @@ */ package jalview.ws.sifts; -import jalview.analysis.AlignSeq; -import jalview.analysis.scoremodels.ScoreMatrix; -import jalview.analysis.scoremodels.ScoreModels; -import jalview.api.DBRefEntryI; -import jalview.api.SiftsClientI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; -import jalview.datamodel.SequenceI; -import jalview.io.StructureFile; -import jalview.schemes.ResidueProperties; -import jalview.structure.StructureMapping; -import jalview.util.Comparison; -import jalview.util.DBRefUtils; -import jalview.util.Format; -import jalview.xml.binding.sifts.Entry; -import jalview.xml.binding.sifts.Entry.Entity; -import jalview.xml.binding.sifts.Entry.Entity.Segment; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; - import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -61,18 +39,43 @@ import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; import javax.xml.bind.Unmarshaller; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamReader; -import MCview.Atom; -import MCview.PDBChain; +import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.api.DBRefEntryI; +import jalview.api.SiftsClientI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.io.BackupFiles; +import jalview.io.StructureFile; +import jalview.schemes.ResidueProperties; +import jalview.structure.StructureMapping; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; +import jalview.util.Format; +import jalview.util.Platform; +import jalview.xml.binding.sifts.Entry; +import jalview.xml.binding.sifts.Entry.Entity; +import jalview.xml.binding.sifts.Entry.Entity.Segment; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; +import mc_view.Atom; +import mc_view.PDBChain; public class SiftsClient implements SiftsClientI { @@ -116,9 +119,12 @@ public class SiftsClient implements SiftsClientI private final static String NEWLINE = System.lineSeparator(); + private static final boolean GET_STREAM = false; + private static final boolean CACHE_FILE = true; private String curSourceDBRef; private HashSet curDBRefAccessionIdsString; + private boolean doCache = false; private enum CoordinateSys { @@ -164,8 +170,31 @@ public class SiftsClient implements SiftsClientI { this.pdb = pdb; this.pdbId = pdb.getId(); - File siftsFile = getSiftsFile(pdbId); - siftsEntry = parseSIFTs(siftsFile); + if (doCache) { + File siftsFile = getSiftsFile(pdbId); + siftsEntry = parseSIFTs(siftsFile); + } else { + siftsEntry = parseSIFTSStreamFor(pdbId); + } + } + + /** + * A more streamlined version of SIFT reading that allows for streaming of the data. + * + * @param pdbId + * @return + * @throws SiftsException + */ + private static Entry parseSIFTSStreamFor(String pdbId) throws SiftsException + { + try + { + InputStream is = (InputStream) downloadSifts(pdbId, GET_STREAM); + return parseSIFTs(is); + } catch (Exception e) + { + throw new SiftsException(e.getMessage()); + } } /** @@ -179,19 +208,25 @@ public class SiftsClient implements SiftsClientI */ private Entry parseSIFTs(File siftFile) throws SiftsException { - try (InputStream in = new FileInputStream(siftFile); - GZIPInputStream gzis = new GZIPInputStream(in);) + try (InputStream in = new FileInputStream(siftFile)) { + return parseSIFTs(in); + } catch (Exception e) + { + e.printStackTrace(); + throw new SiftsException(e.getMessage()); + } + } + + private static Entry parseSIFTs(InputStream in) throws Exception { + try (GZIPInputStream gzis = new GZIPInputStream(in);) { // System.out.println("File : " + siftFile.getAbsolutePath()); JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); - return (Entry) um.unmarshal(streamReader); - } catch (Exception e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); + JAXBElement jbe = um.unmarshal(streamReader, Entry.class); + return jbe.getValue(); } } @@ -214,27 +249,27 @@ public class SiftsClient implements SiftsClientI } String siftsFileName = SiftsSettings.getSiftDownloadDirectory() - + pdbId.toLowerCase() + ".xml.gz"; + + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz"; File siftsFile = new File(siftsFileName); if (siftsFile.exists()) { // The line below is required for unit testing... don't comment it out!!! System.out.println(">>> SIFTS File already downloaded for " + pdbId); - if (isFileOlderThanThreshold(siftsFile, + if (Platform.isFileOlderThanThreshold(siftsFile, SiftsSettings.getCacheThresholdInDays())) { File oldSiftsFile = new File(siftsFileName + "_old"); - siftsFile.renameTo(oldSiftsFile); + BackupFiles.moveFileToFile(siftsFile, oldSiftsFile); try { - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT)); oldSiftsFile.delete(); return siftsFile; } catch (IOException e) { e.printStackTrace(); - oldSiftsFile.renameTo(siftsFile); + BackupFiles.moveFileToFile(oldSiftsFile, siftsFile); return new File(siftsFileName); } } @@ -245,7 +280,7 @@ public class SiftsClient implements SiftsClientI } try { - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT)); } catch (IOException e) { throw new SiftsException(e.getMessage()); @@ -254,35 +289,6 @@ public class SiftsClient implements SiftsClientI } /** - * This method enables checking if a cached file has exceeded a certain - * threshold(in days) - * - * @param file - * the cached file - * @param noOfDays - * the threshold in days - * @return - */ - public static boolean isFileOlderThanThreshold(File file, int noOfDays) - { - Path filePath = file.toPath(); - BasicFileAttributes attr; - int diffInDays = 0; - try - { - attr = Files.readAttributes(filePath, BasicFileAttributes.class); - diffInDays = (int) ((new Date().getTime() - - attr.lastModifiedTime().toMillis()) - / (1000 * 60 * 60 * 24)); - // System.out.println("Diff in days : " + diffInDays); - } catch (IOException e) - { - e.printStackTrace(); - } - return noOfDays <= diffInDays; - } - - /** * Download a SIFTs XML file for a given PDB Id from an FTP repository * * @param pdbId @@ -292,39 +298,48 @@ public class SiftsClient implements SiftsClientI */ public static File downloadSiftsFile(String pdbId) throws SiftsException, IOException + { + return (File) downloadSifts(pdbId, CACHE_FILE); + } + + /** + * Download SIFTs XML with the option to cache a file or to get a stream. + * + * @param pdbId + * @param asFile + * @return + * @throws IOException + */ + private static Object downloadSifts(String pdbId, boolean asFile) throws IOException { + pdbId = pdbId.toLowerCase(Locale.ROOT); if (pdbId.contains(".cif")) { pdbId = pdbId.replace(".cif", ""); } String siftFile = pdbId + ".xml.gz"; - String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; - String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() - + siftFile; - File siftsDownloadDir = new File( - SiftsSettings.getSiftDownloadDirectory()); - if (!siftsDownloadDir.exists()) + File downloadTo = null; + if (asFile) { - siftsDownloadDir.mkdirs(); + downloadTo = new File( + SiftsSettings.getSiftDownloadDirectory() + siftFile); + File siftsDownloadDir = new File(SiftsSettings.getSiftDownloadDirectory()); + if (!siftsDownloadDir.exists()) + { + siftsDownloadDir.mkdirs(); + } } - // System.out.println(">> Download ftp url : " + siftsFileFTPURL); - // long now = System.currentTimeMillis(); + + String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; URL url = new URL(siftsFileFTPURL); URLConnection conn = url.openConnection(); - InputStream inputStream = conn.getInputStream(); - FileOutputStream outputStream = new FileOutputStream( - downloadedSiftsFile); - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead = -1; - while ((bytesRead = inputStream.read(buffer)) != -1) - { - outputStream.write(buffer, 0, bytesRead); - } - outputStream.close(); - inputStream.close(); - // System.out.println(">>> File downloaded : " + downloadedSiftsFile - // + " took " + (System.currentTimeMillis() - now) + "ms"); - return new File(downloadedSiftsFile); + InputStream is = conn.getInputStream(); + if (!asFile) + return is; + // This is MUCH more efficent in JavaScript, as we already have the bytes + Platform.streamToFile(is, downloadTo); + is.close(); + return downloadTo; } /** @@ -337,7 +352,7 @@ public class SiftsClient implements SiftsClientI public static boolean deleteSiftsFileByPDBId(String pdbId) { File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() - + pdbId.toLowerCase() + ".xml.gz"); + + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz"); if (siftsFile.exists()) { return siftsFile.delete(); @@ -411,8 +426,8 @@ public class SiftsClient implements SiftsClientI .getMapRegion(); for (MapRegion mapRegion : mapRegions) { - accessions - .add(mapRegion.getDb().getDbAccessionId().toLowerCase()); + accessions.add(mapRegion.getDb().getDbAccessionId() + .toLowerCase(Locale.ROOT)); } } } @@ -482,9 +497,11 @@ public class SiftsClient implements SiftsClientI HashSet dbRefAccessionIdsString = new HashSet(); for (DBRefEntry dbref : seq.getDBRefs()) { - dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase()); + dbRefAccessionIdsString + .add(dbref.getAccessionId().toLowerCase(Locale.ROOT)); } - dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase()); + dbRefAccessionIdsString + .add(sourceDBRef.getAccessionId().toLowerCase(Locale.ROOT)); curDBRefAccessionIdsString = dbRefAccessionIdsString; curSourceDBRef = sourceDBRef.getAccessionId(); @@ -631,7 +648,7 @@ public class SiftsClient implements SiftsClientI for (Residue residue : residues) { boolean isObserved = isResidueObserved(residue); - int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(), + int pdbeIndex = Platform.getLeadingIntegerValue(residue.getDbResNum(), UNASSIGNED); int currSeqIndex = UNASSIGNED; List cRefDbs = residue.getCrossRefDb(); @@ -643,7 +660,7 @@ public class SiftsClient implements SiftsClientI pdbRefDb = cRefDb; if (firstPDBResNum == UNASSIGNED) { - firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(), + firstPDBResNum = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(), UNASSIGNED); } else @@ -658,7 +675,7 @@ public class SiftsClient implements SiftsClientI if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) && isAccessionMatched(cRefDb.getDbAccessionId())) { - currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(), + currSeqIndex = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(), UNASSIGNED); if (pdbRefDb != null) { @@ -690,6 +707,13 @@ public class SiftsClient implements SiftsClientI ++nonObservedShiftIndex; } } + if (currSeqIndex == UNASSIGNED) + { + // change in logic - unobserved residues with no currSeqIndex + // corresponding are still counted in both nonObservedShiftIndex and + // pdbeIndex... + continue; + } // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) // // true // numbering @@ -701,55 +725,29 @@ public class SiftsClient implements SiftsClientI { int resNum = (pdbRefDb == null) - ? getLeadingIntegerValue(residue.getDbResNum(), + ? Platform.getLeadingIntegerValue(residue.getDbResNum(), UNASSIGNED) - : getLeadingIntegerValue(pdbRefDb.getDbResNum(), + : Platform.getLeadingIntegerValue(pdbRefDb.getDbResNum(), UNASSIGNED); - if (isResidueObserved(residue) - || seqCoordSys == CoordinateSys.UNIPROT) + if (isObserved) { char resCharCode = ResidueProperties .getSingleCharacterCode(ResidueProperties .getCanonicalAminoAcid(residue.getDbResName())); resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + + int[] mappingcols = new int[] { Integer.valueOf(resNum), + UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED }; + + mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols); } - // TODO: mapping should have SeqCoordSysNum => int[] {PDBeNum, - // PDBRESNUM, ATOMNUM } - mapping.put(currSeqIndex - nonObservedShiftIndex, - new int[] - { Integer.valueOf(resNum), UNASSIGNED, - isObserved ? firstPDBResNum - : UNASSIGNED }); } } } } /** - * Get the leading integer part of a string that begins with an integer. - * - * @param input - * - the string input to process - * @param failValue - * - value returned if unsuccessful - * @return - */ - static int getLeadingIntegerValue(String input, int failValue) - { - if (input == null) - { - return failValue; - } - String[] parts = input.split("(?=\\D)(?<=\\d)"); - if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) - { - return Integer.valueOf(parts[0]); - } - return failValue; - } - - /** * * @param chainId * Target chain to populate mapping of its atom positions. @@ -863,14 +861,15 @@ public class SiftsClient implements SiftsClientI { boolean isStrictMatch = true; return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) - : curDBRefAccessionIdsString.contains(accession.toLowerCase()); + : curDBRefAccessionIdsString + .contains(accession.toLowerCase(Locale.ROOT)); } private boolean isFoundInSiftsEntry(String accessionId) { Set siftsDBRefs = getAllMappingAccession(); return accessionId != null - && siftsDBRefs.contains(accessionId.toLowerCase()); + && siftsDBRefs.contains(accessionId.toLowerCase(Locale.ROOT)); } /**