X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=892ebd89c5f525b460dbcbb29b1bd0e0350f0a25;hb=d043ce47fc710d3eb2629ba926a8a7417bd67d8c;hp=68af7c3f570f36ca3b9ce35b39eb9ee9c89b57f0;hpb=f4766a7bbcfae845fc95923b01fa14ff83d589ff;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 68af7c3..892ebd8 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -20,28 +20,6 @@ */ package jalview.ws.sifts; -import jalview.analysis.AlignSeq; -import jalview.analysis.scoremodels.ScoreMatrix; -import jalview.analysis.scoremodels.ScoreModels; -import jalview.api.DBRefEntryI; -import jalview.api.SiftsClientI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; -import jalview.datamodel.SequenceI; -import jalview.io.StructureFile; -import jalview.schemes.ResidueProperties; -import jalview.structure.StructureMapping; -import jalview.util.Comparison; -import jalview.util.DBRefUtils; -import jalview.util.Format; -import jalview.xml.binding.sifts.Entry; -import jalview.xml.binding.sifts.Entry.Entity; -import jalview.xml.binding.sifts.Entry.Entity.Segment; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; - import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -61,18 +39,43 @@ import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; import javax.xml.bind.Unmarshaller; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamReader; -import MCview.Atom; -import MCview.PDBChain; +import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.api.DBRefEntryI; +import jalview.api.SiftsClientI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.io.BackupFiles; +import jalview.io.StructureFile; +import jalview.schemes.ResidueProperties; +import jalview.structure.StructureMapping; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; +import jalview.util.Format; +import jalview.util.Platform; +import jalview.xml.binding.sifts.Entry; +import jalview.xml.binding.sifts.Entry.Entity; +import jalview.xml.binding.sifts.Entry.Entity.Segment; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; +import mc_view.Atom; +import mc_view.PDBChain; public class SiftsClient implements SiftsClientI { @@ -92,23 +95,36 @@ public class SiftsClient implements SiftsClientI private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; + /** + * PDB sequence position to sequence coordinate mapping as derived from SIFTS + * record for the identified SeqCoordSys Used for lift-over from sequence + * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence + * being annotated with PDB data + */ + private jalview.datamodel.Mapping seqFromPdbMapping; + private static final int BUFFER_SIZE = 4096; - public static final int UNASSIGNED = -1; + public static final int UNASSIGNED = Integer.MIN_VALUE; private static final int PDB_RES_POS = 0; private static final int PDB_ATOM_POS = 1; + private static final int PDBE_POS = 2; + private static final String NOT_OBSERVED = "Not_Observed"; private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; private final static String NEWLINE = System.lineSeparator(); + private static final boolean GET_STREAM = false; + private static final boolean CACHE_FILE = true; private String curSourceDBRef; private HashSet curDBRefAccessionIdsString; + private boolean doCache = false; private enum CoordinateSys { @@ -154,8 +170,31 @@ public class SiftsClient implements SiftsClientI { this.pdb = pdb; this.pdbId = pdb.getId(); - File siftsFile = getSiftsFile(pdbId); - siftsEntry = parseSIFTs(siftsFile); + if (doCache) { + File siftsFile = getSiftsFile(pdbId); + siftsEntry = parseSIFTs(siftsFile); + } else { + siftsEntry = parseSIFTSStreamFor(pdbId); + } + } + + /** + * A more streamlined version of SIFT reading that allows for streaming of the data. + * + * @param pdbId + * @return + * @throws SiftsException + */ + private static Entry parseSIFTSStreamFor(String pdbId) throws SiftsException + { + try + { + InputStream is = (InputStream) downloadSifts(pdbId, GET_STREAM); + return parseSIFTs(is); + } catch (Exception e) + { + throw new SiftsException(e.getMessage()); + } } /** @@ -169,19 +208,25 @@ public class SiftsClient implements SiftsClientI */ private Entry parseSIFTs(File siftFile) throws SiftsException { - try (InputStream in = new FileInputStream(siftFile); - GZIPInputStream gzis = new GZIPInputStream(in);) + try (InputStream in = new FileInputStream(siftFile)) { + return parseSIFTs(in); + } catch (Exception e) + { + e.printStackTrace(); + throw new SiftsException(e.getMessage()); + } + } + + private static Entry parseSIFTs(InputStream in) throws Exception { + try (GZIPInputStream gzis = new GZIPInputStream(in);) { // System.out.println("File : " + siftFile.getAbsolutePath()); JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); - return (Entry) um.unmarshal(streamReader); - } catch (Exception e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); + JAXBElement jbe = um.unmarshal(streamReader, Entry.class); + return jbe.getValue(); } } @@ -204,27 +249,27 @@ public class SiftsClient implements SiftsClientI } String siftsFileName = SiftsSettings.getSiftDownloadDirectory() - + pdbId.toLowerCase() + ".xml.gz"; + + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz"; File siftsFile = new File(siftsFileName); if (siftsFile.exists()) { // The line below is required for unit testing... don't comment it out!!! System.out.println(">>> SIFTS File already downloaded for " + pdbId); - if (isFileOlderThanThreshold(siftsFile, + if (Platform.isFileOlderThanThreshold(siftsFile, SiftsSettings.getCacheThresholdInDays())) { File oldSiftsFile = new File(siftsFileName + "_old"); - siftsFile.renameTo(oldSiftsFile); + BackupFiles.moveFileToFile(siftsFile, oldSiftsFile); try { - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT)); oldSiftsFile.delete(); return siftsFile; } catch (IOException e) { e.printStackTrace(); - oldSiftsFile.renameTo(siftsFile); + BackupFiles.moveFileToFile(oldSiftsFile, siftsFile); return new File(siftsFileName); } } @@ -235,7 +280,7 @@ public class SiftsClient implements SiftsClientI } try { - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT)); } catch (IOException e) { throw new SiftsException(e.getMessage()); @@ -244,35 +289,6 @@ public class SiftsClient implements SiftsClientI } /** - * This method enables checking if a cached file has exceeded a certain - * threshold(in days) - * - * @param file - * the cached file - * @param noOfDays - * the threshold in days - * @return - */ - public static boolean isFileOlderThanThreshold(File file, int noOfDays) - { - Path filePath = file.toPath(); - BasicFileAttributes attr; - int diffInDays = 0; - try - { - attr = Files.readAttributes(filePath, BasicFileAttributes.class); - diffInDays = (int) ((new Date().getTime() - - attr.lastModifiedTime().toMillis()) - / (1000 * 60 * 60 * 24)); - // System.out.println("Diff in days : " + diffInDays); - } catch (IOException e) - { - e.printStackTrace(); - } - return noOfDays <= diffInDays; - } - - /** * Download a SIFTs XML file for a given PDB Id from an FTP repository * * @param pdbId @@ -282,39 +298,48 @@ public class SiftsClient implements SiftsClientI */ public static File downloadSiftsFile(String pdbId) throws SiftsException, IOException + { + return (File) downloadSifts(pdbId, CACHE_FILE); + } + + /** + * Download SIFTs XML with the option to cache a file or to get a stream. + * + * @param pdbId + * @param asFile + * @return + * @throws IOException + */ + private static Object downloadSifts(String pdbId, boolean asFile) throws IOException { + pdbId = pdbId.toLowerCase(Locale.ROOT); if (pdbId.contains(".cif")) { pdbId = pdbId.replace(".cif", ""); } String siftFile = pdbId + ".xml.gz"; - String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; - String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() - + siftFile; - File siftsDownloadDir = new File( - SiftsSettings.getSiftDownloadDirectory()); - if (!siftsDownloadDir.exists()) + File downloadTo = null; + if (asFile) { - siftsDownloadDir.mkdirs(); + downloadTo = new File( + SiftsSettings.getSiftDownloadDirectory() + siftFile); + File siftsDownloadDir = new File(SiftsSettings.getSiftDownloadDirectory()); + if (!siftsDownloadDir.exists()) + { + siftsDownloadDir.mkdirs(); + } } - // System.out.println(">> Download ftp url : " + siftsFileFTPURL); - // long now = System.currentTimeMillis(); + + String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; URL url = new URL(siftsFileFTPURL); URLConnection conn = url.openConnection(); - InputStream inputStream = conn.getInputStream(); - FileOutputStream outputStream = new FileOutputStream( - downloadedSiftsFile); - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead = -1; - while ((bytesRead = inputStream.read(buffer)) != -1) - { - outputStream.write(buffer, 0, bytesRead); - } - outputStream.close(); - inputStream.close(); - // System.out.println(">>> File downloaded : " + downloadedSiftsFile - // + " took " + (System.currentTimeMillis() - now) + "ms"); - return new File(downloadedSiftsFile); + InputStream is = conn.getInputStream(); + if (!asFile) + return is; + // This is MUCH more efficent in JavaScript, as we already have the bytes + Platform.streamToFile(is, downloadTo); + is.close(); + return downloadTo; } /** @@ -327,7 +352,7 @@ public class SiftsClient implements SiftsClientI public static boolean deleteSiftsFileByPDBId(String pdbId) { File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() - + pdbId.toLowerCase() + ".xml.gz"); + + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz"); if (siftsFile.exists()) { return siftsFile.delete(); @@ -401,8 +426,8 @@ public class SiftsClient implements SiftsClientI .getMapRegion(); for (MapRegion mapRegion : mapRegions) { - accessions - .add(mapRegion.getDb().getDbAccessionId().toLowerCase()); + accessions.add(mapRegion.getDb().getDbAccessionId() + .toLowerCase(Locale.ROOT)); } } } @@ -413,6 +438,11 @@ public class SiftsClient implements SiftsClientI public StructureMapping getSiftsStructureMapping(SequenceI seq, String pdbFile, String chain) throws SiftsException { + SequenceI aseq = seq; + while (seq.getDatasetSequence() != null) + { + seq = seq.getDatasetSequence(); + } structId = (chain == null) ? pdbId : pdbId + "|" + chain; System.out.println("Getting SIFTS mapping for " + structId + ": seq " + seq.getName()); @@ -435,8 +465,9 @@ public class SiftsClient implements SiftsClientI HashMap mapping = getGreedyMapping(chain, seq, ps); String mappingOutput = mappingDetails.toString(); - StructureMapping siftsMapping = new StructureMapping(seq, pdbFile, - pdbId, chain, mapping, mappingOutput); + StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile, + pdbId, chain, mapping, mappingOutput, seqFromPdbMapping); + return siftsMapping; } @@ -444,8 +475,8 @@ public class SiftsClient implements SiftsClientI public HashMap getGreedyMapping(String entityId, SequenceI seq, java.io.PrintStream os) throws SiftsException { - List omitNonObserved = new ArrayList(); - int nonObservedShiftIndex = 0; + List omitNonObserved = new ArrayList<>(); + int nonObservedShiftIndex = 0,pdbeNonObserved=0; // System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); @@ -466,9 +497,11 @@ public class SiftsClient implements SiftsClientI HashSet dbRefAccessionIdsString = new HashSet(); for (DBRefEntry dbref : seq.getDBRefs()) { - dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase()); + dbRefAccessionIdsString + .add(dbref.getAccessionId().toLowerCase(Locale.ROOT)); } - dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase()); + dbRefAccessionIdsString + .add(sourceDBRef.getAccessionId().toLowerCase(Locale.ROOT)); curDBRefAccessionIdsString = dbRefAccessionIdsString; curSourceDBRef = sourceDBRef.getAccessionId(); @@ -476,7 +509,7 @@ public class SiftsClient implements SiftsClientI TreeMap resNumMap = new TreeMap(); List segments = entity.getSegment(); SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, - omitNonObserved, nonObservedShiftIndex); + omitNonObserved, nonObservedShiftIndex,pdbeNonObserved); processSegments(segments, shp); try { @@ -498,15 +531,61 @@ public class SiftsClient implements SiftsClientI { throw new SiftsException("SIFTS mapping failed"); } + // also construct a mapping object between the seq-coord sys and the PDB seq's coord sys Integer[] keys = mapping.keySet().toArray(new Integer[0]); Arrays.sort(keys); seqStart = keys[0]; seqEnd = keys[keys.length - 1]; - + List from=new ArrayList<>(),to=new ArrayList<>(); + int[]_cfrom=null,_cto=null; String matchedSeq = originalSeq; - if (seqStart != UNASSIGNED) + if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb sequence that starts <-1 { + for (int seqps:keys) + { + int pdbpos = mapping.get(seqps)[PDBE_POS]; + if (pdbpos == UNASSIGNED) + { + // not correct - pdbpos might be -1, but leave it for now + continue; + } + if (_cfrom==null || seqps!=_cfrom[1]+1) + { + _cfrom = new int[] { seqps,seqps}; + from.add(_cfrom); + _cto = null; // discontinuity + } else { + _cfrom[1]= seqps; + } + if (_cto==null || pdbpos!=1+_cto[1]) + { + _cto = new int[] { pdbpos,pdbpos}; + to.add(_cto); + } else { + _cto[1] = pdbpos; + } + } + _cfrom = new int[from.size() * 2]; + _cto = new int[to.size() * 2]; + int p = 0; + for (int[] range : from) + { + _cfrom[p++] = range[0]; + _cfrom[p++] = range[1]; + } + ; + p = 0; + for (int[] range : to) + { + _cto[p++] = range[0]; + _cto[p++] = range[1]; + } + ; + + seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom, + 1, + 1); pdbStart = mapping.get(seqStart)[PDB_RES_POS]; pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; int orignalSeqStart = seq.getStart(); @@ -559,6 +638,8 @@ public class SiftsClient implements SiftsClientI TreeMap resNumMap = shp.getResNumMap(); List omitNonObserved = shp.getOmitNonObserved(); int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); + int pdbeNonObservedCount = shp.getPdbeNonObserved(); + int firstPDBResNum = UNASSIGNED; for (Segment segment : segments) { // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s @@ -566,6 +647,9 @@ public class SiftsClient implements SiftsClientI List residues = segment.getListResidue().getResidue(); for (Residue residue : residues) { + boolean isObserved = isResidueObserved(residue); + int pdbeIndex = Platform.getLeadingIntegerValue(residue.getDbResNum(), + UNASSIGNED); int currSeqIndex = UNASSIGNED; List cRefDbs = residue.getCrossRefDb(); CrossRefDb pdbRefDb = null; @@ -574,11 +658,24 @@ public class SiftsClient implements SiftsClientI if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) { pdbRefDb = cRefDb; + if (firstPDBResNum == UNASSIGNED) + { + firstPDBResNum = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(), + UNASSIGNED); + } + else + { + if (isObserved) + { + // after we find the first observed residue we just increment + firstPDBResNum++; + } + } } if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) && isAccessionMatched(cRefDb.getDbAccessionId())) { - currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(), + currSeqIndex = Platform.getLeadingIntegerValue(cRefDb.getDbResNum(), UNASSIGNED); if (pdbRefDb != null) { @@ -586,64 +683,71 @@ public class SiftsClient implements SiftsClientI } } } + if (!isObserved) + { + ++pdbeNonObservedCount; + } + if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe + // ??? + { + // if the sequence has a primary reference to the PDB, then we are + // dealing with a sequence extracted directly from the PDB. In that + // case, numbering is PDBe - non-observed residues + currSeqIndex = seq.getStart() - 1 + pdbeIndex; + } + if (!isObserved) + { + if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only + // here + { + // mapping to PDB or PDBe so we need to bookkeep for the + // non-observed + // SEQRES positions + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; + } + } if (currSeqIndex == UNASSIGNED) { + // change in logic - unobserved residues with no currSeqIndex + // corresponding are still counted in both nonObservedShiftIndex and + // pdbeIndex... continue; } - if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) + // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) // + // true + // numbering + // is + // not + // up + // to + // seq.getEnd() { int resNum = (pdbRefDb == null) - ? getLeadingIntegerValue(residue.getDbResNum(), + ? Platform.getLeadingIntegerValue(residue.getDbResNum(), UNASSIGNED) - : getLeadingIntegerValue(pdbRefDb.getDbResNum(), + : Platform.getLeadingIntegerValue(pdbRefDb.getDbResNum(), UNASSIGNED); - if (isResidueObserved(residue) - || seqCoordSys == CoordinateSys.UNIPROT) + if (isObserved) { char resCharCode = ResidueProperties .getSingleCharacterCode(ResidueProperties .getCanonicalAminoAcid(residue.getDbResName())); resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + + int[] mappingcols = new int[] { Integer.valueOf(resNum), + UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED }; + + mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols); } - else - { - omitNonObserved.add(currSeqIndex); - ++nonObservedShiftIndex; - } - mapping.put(currSeqIndex - nonObservedShiftIndex, - new int[] - { Integer.valueOf(resNum), UNASSIGNED }); } } } } /** - * Get the leading integer part of a string that begins with an integer. - * - * @param input - * - the string input to process - * @param failValue - * - value returned if unsuccessful - * @return - */ - static int getLeadingIntegerValue(String input, int failValue) - { - if (input == null) - { - return failValue; - } - String[] parts = input.split("(?=\\D)(?<=\\d)"); - if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) - { - return Integer.valueOf(parts[0]); - } - return failValue; - } - - /** * * @param chainId * Target chain to populate mapping of its atom positions. @@ -757,14 +861,15 @@ public class SiftsClient implements SiftsClientI { boolean isStrictMatch = true; return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) - : curDBRefAccessionIdsString.contains(accession.toLowerCase()); + : curDBRefAccessionIdsString + .contains(accession.toLowerCase(Locale.ROOT)); } private boolean isFoundInSiftsEntry(String accessionId) { Set siftsDBRefs = getAllMappingAccession(); return accessionId != null - && siftsDBRefs.contains(accessionId.toLowerCase()); + && siftsDBRefs.contains(accessionId.toLowerCase(Locale.ROOT)); } /** @@ -904,17 +1009,36 @@ public class SiftsClient implements SiftsClientI private int nonObservedShiftIndex; + /** + * count of number of 'not observed' positions in the PDB record's SEQRES + * (total number of residues with coordinates == length(SEQRES) - + * pdbeNonObserved + */ + private int pdbeNonObserved; + public SegmentHelperPojo(SequenceI seq, HashMap mapping, TreeMap resNumMap, - List omitNonObserved, int nonObservedShiftIndex) + List omitNonObserved, int nonObservedShiftIndex, + int pdbeNonObserved) { setSeq(seq); setMapping(mapping); setResNumMap(resNumMap); setOmitNonObserved(omitNonObserved); setNonObservedShiftIndex(nonObservedShiftIndex); + setPdbeNonObserved(pdbeNonObserved); + + } + + public void setPdbeNonObserved(int pdbeNonObserved2) + { + this.pdbeNonObserved = pdbeNonObserved2; } + public int getPdbeNonObserved() + { + return pdbeNonObserved; + } public SequenceI getSeq() { return seq; @@ -964,6 +1088,7 @@ public class SiftsClient implements SiftsClientI { this.nonObservedShiftIndex = nonObservedShiftIndex; } + } @Override