X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=6e7b9880284ebbf81dca2b24c8fe4e7645042512;hb=2c582d859f37cd06681f9f47a5e6eea47ac0b9ef;hp=c205e9ec1965da8237a330405d87ce92f284641a;hpb=b6a605ef4e4ef09b091b6db56d7aef24c7bf44ef;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index c205e9e..6e7b988 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -47,9 +47,14 @@ import java.io.InputStream; import java.io.PrintStream; import java.net.URL; import java.net.URLConnection; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -96,14 +101,6 @@ public class SiftsClient implements SiftsClientI private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; - public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System - .getProperty("user.home") - + File.separatorChar - + ".sifts_downloads" + File.separatorChar; - - public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache - .getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR); - private final static String NEWLINE = System.lineSeparator(); private String curSourceDBRef; @@ -188,7 +185,7 @@ public class SiftsClient implements SiftsClientI try (InputStream in = new FileInputStream(siftFile); GZIPInputStream gzis = new GZIPInputStream(in);) { - System.out.println("File : " + siftFile.getAbsolutePath()); + // System.out.println("File : " + siftFile.getAbsolutePath()); JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); @@ -227,14 +224,19 @@ public class SiftsClient implements SiftsClientI */ public static File getSiftsFile(String pdbId) throws SiftsException { - File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() - + ".xml.gz"); + File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"); if (siftsFile.exists()) { - // TODO it may be worth performing an age check to determine if a - // new SIFTs file should be re-downloaded as SIFTs entries are usually - // updated weekly + // The line below is required for unit testing... don't comment it out!!! System.out.println(">>> SIFTS File already downloaded for " + pdbId); + + if (isFileOlderThanThreshold(siftsFile, + SiftsSettings.getCacheThresholdInDays())) + { + // System.out.println("Downloaded file is out of date, hence re-downloading..."); + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + } return siftsFile; } siftsFile = downloadSiftsFile(pdbId.toLowerCase()); @@ -242,6 +244,34 @@ public class SiftsClient implements SiftsClientI } /** + * This method enables checking if a cached file has exceeded a certain + * threshold(in days) + * + * @param file + * the cached file + * @param noOfDays + * the threshold in days + * @return + */ + public static boolean isFileOlderThanThreshold(File file, int noOfDays) + { + Path filePath = file.toPath(); + BasicFileAttributes attr; + int diffInDays = 0; + try + { + attr = Files.readAttributes(filePath, BasicFileAttributes.class); + diffInDays = (int) ((new Date().getTime() - attr.lastModifiedTime() + .toMillis()) / (1000 * 60 * 60 * 24)); + // System.out.println("Diff in days : " + diffInDays); + } catch (IOException e) + { + e.printStackTrace(); + } + return noOfDays <= diffInDays; + } + + /** * Download a SIFTs XML file for a given PDB Id from an FTP repository * * @param pdbId @@ -250,17 +280,23 @@ public class SiftsClient implements SiftsClientI */ public static File downloadSiftsFile(String pdbId) throws SiftsException { + if (pdbId.contains(".cif")) + { + pdbId = pdbId.replace(".cif", ""); + } String siftFile = pdbId + ".xml.gz"; String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; - String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile; - File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR); + String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() + + siftFile; + File siftsDownloadDir = new File( + SiftsSettings.getSiftDownloadDirectory()); if (!siftsDownloadDir.exists()) { siftsDownloadDir.mkdirs(); } try { - System.out.println(">> Download ftp url : " + siftsFileFTPURL); + // System.out.println(">> Download ftp url : " + siftsFileFTPURL); URL url = new URL(siftsFileFTPURL); URLConnection conn = url.openConnection(); InputStream inputStream = conn.getInputStream(); @@ -274,7 +310,7 @@ public class SiftsClient implements SiftsClientI } outputStream.close(); inputStream.close(); - System.out.println(">>> File downloaded : " + downloadedSiftsFile); + // System.out.println(">>> File downloaded : " + downloadedSiftsFile); } catch (IOException ex) { throw new SiftsException(ex.getMessage()); @@ -291,8 +327,8 @@ public class SiftsClient implements SiftsClientI */ public static boolean deleteSiftsFileByPDBId(String pdbId) { - File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() - + ".xml.gz"); + File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"); if (siftsFile.exists()) { return siftsFile.delete(); @@ -300,7 +336,6 @@ public class SiftsClient implements SiftsClientI return true; } - /** * Get a valid SIFTs DBRef for the given sequence current SIFTs entry * @@ -324,14 +359,6 @@ public class SiftsClient implements SiftsClientI DBRefEntry[] dbRefs = seq.getDBRefs(); if (dbRefs == null || dbRefs.length < 1) { - // final SequenceI[] seqs = new SequenceI[] { seq }; - // new jalview.ws.DBRefFetcher(seqs, null, null, null, false) - // .fetchDBRefs(true); - // dbRefs = seq.getDBRefs(); - } - - if (dbRefs == null || dbRefs.length < 1) - { throw new SiftsException("Could not get source DB Ref"); } @@ -357,7 +384,6 @@ public class SiftsClient implements SiftsClientI throw new SiftsException("Could not get source DB Ref"); } - /** * Check that the DBRef Entry is properly populated and is available in this * SiftClient instance @@ -420,15 +446,13 @@ public class SiftsClient implements SiftsClientI String mappingOutput = mappingDetails.toString(); StructureMapping siftsMapping = new StructureMapping(seq, pdbFile, - pdbId, chain, mapping, - mappingOutput); + pdbId, chain, mapping, mappingOutput); return siftsMapping; } @Override - public HashMap getGreedyMapping(String entityId, SequenceI seq, - java.io.PrintStream os) - throws SiftsException + public HashMap getGreedyMapping(String entityId, + SequenceI seq, java.io.PrintStream os) throws SiftsException { ArrayList omitNonObserved = new ArrayList(); int nonObservedShiftIndex = 0; @@ -436,8 +460,7 @@ public class SiftsClient implements SiftsClientI Entity entity = null; entity = getEntityById(entityId); String originalSeq = AlignSeq.extractGaps( - jalview.util.Comparison.GapChars, - seq.getSequenceAsString()); + jalview.util.Comparison.GapChars, seq.getSequenceAsString()); HashMap mapping = new HashMap(); DBRefEntryI sourceDBRef = seq.getSourceDBRef(); if (sourceDBRef == null) @@ -489,7 +512,14 @@ public class SiftsClient implements SiftsClientI String resNumIndexString = cRefDb.getDbResNum() .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED) : cRefDb.getDbResNum(); - currSeqIndex = Integer.valueOf(resNumIndexString); + try + { + currSeqIndex = Integer.valueOf(resNumIndexString); + } catch (NumberFormatException nfe) + { + currSeqIndex = Integer.valueOf(resNumIndexString + .split("[a-zA-Z]")[0]); + } if (pdbRefDb != null) { break;// exit loop if pdb and uniprot are already found @@ -500,13 +530,14 @@ public class SiftsClient implements SiftsClientI { continue; } - if (currSeqIndex > seq.getStart() && currSeqIndex <= seq.getEnd()) + if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) { int resNum; try { resNum = (pdbRefDb == null) ? Integer.valueOf(residue - .getDbResNum()) : Integer.valueOf(pdbRefDb.getDbResNum()); + .getDbResNum()) : Integer.valueOf(pdbRefDb + .getDbResNum()); } catch (NumberFormatException nfe) { resNum = (pdbRefDb == null) ? Integer.valueOf(residue @@ -518,7 +549,8 @@ public class SiftsClient implements SiftsClientI || seqCoordSys == CoordinateSys.UNIPROT) { char resCharCode = ResidueProperties - .getSingleCharacterCode(residue.getDbResName()); + .getSingleCharacterCode(ResidueProperties + .getCanonicalAminoAcid(residue.getDbResName())); resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); } else @@ -538,7 +570,10 @@ public class SiftsClient implements SiftsClientI { e.printStackTrace(); } - padWithGaps(resNumMap, omitNonObserved); + if (seqCoordSys == CoordinateSys.UNIPROT) + { + padWithGaps(resNumMap, omitNonObserved); + } int seqStart = UNASSIGNED; int seqEnd = UNASSIGNED; int pdbStart = UNASSIGNED; @@ -546,6 +581,10 @@ public class SiftsClient implements SiftsClientI Integer[] keys = mapping.keySet().toArray(new Integer[0]); Arrays.sort(keys); + if (keys.length < 1) + { + throw new SiftsException(">>> Empty SIFTS mapping generated!!"); + } seqStart = keys[0]; seqEnd = keys[keys.length - 1]; @@ -557,12 +596,17 @@ public class SiftsClient implements SiftsClientI int orignalSeqStart = seq.getStart(); if (orignalSeqStart >= 1) { - int subSeqStart = seqStart - orignalSeqStart; + int subSeqStart = (seqStart >= orignalSeqStart) ? seqStart + - orignalSeqStart : 0; int subSeqEnd = seqEnd - (orignalSeqStart - 1); subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() : subSeqEnd; matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); } + else + { + matchedSeq = originalSeq.substring(1, originalSeq.length()); + } } StringBuilder targetStrucSeqs = new StringBuilder(); @@ -574,13 +618,13 @@ public class SiftsClient implements SiftsClientI if (os != null) { MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(seqStart); - mop.setSeqEnd(seqEnd); + mop.setSeqStart(pdbStart); + mop.setSeqEnd(pdbEnd); mop.setSeqName(seq.getName()); mop.setSeqResidue(matchedSeq); - mop.setStrStart(pdbStart); - mop.setStrEnd(pdbEnd); + mop.setStrStart(seqStart); + mop.setStrEnd(seqEnd); mop.setStrName(structId); mop.setStrResidue(targetStrucSeqs.toString()); @@ -591,6 +635,58 @@ public class SiftsClient implements SiftsClientI } /** + * + * @param chainId + * Target chain to populate mapping of its atom positions. + * @param mapping + * Two dimension array of residue index versus atom position + * @throws IllegalArgumentException + * Thrown if chainId or mapping is null + */ + void populateAtomPositions(String chainId, + HashMap mapping) throws IllegalArgumentException + { + PDBChain chain = pdb.findChain(chainId); + if (chain == null || mapping == null) + { + throw new IllegalArgumentException( + "Chain id or mapping must not be null."); + } + for (int[] map : mapping.values()) + { + if (map[PDB_RES_POS] != UNASSIGNED) + { + map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + } + } + } + + /** + * + * @param residueIndex + * The residue index used for the search + * @param atoms + * A collection of Atom to search + * @return atom position for the given residue index + */ + int getAtomIndex(int residueIndex, Collection atoms) + { + if (atoms == null) + { + throw new IllegalArgumentException( + "atoms collection must not be null!"); + } + for (Atom atom : atoms) + { + if (atom.resNumber == residueIndex) + { + return atom.atomIndex; + } + } + return UNASSIGNED; + } + + /** * Checks if the residue instance is marked 'Not_observed' or not * * @param residue @@ -598,16 +694,18 @@ public class SiftsClient implements SiftsClientI */ private boolean isResidueObserved(Residue residue) { - String annotation = getResidueAnnotaiton(residue, + HashSet annotations = getResidueAnnotaitons(residue, ResidueDetailType.ANNOTATION); - if (annotation == null) + if (annotations == null || annotations.isEmpty()) { return true; } - if (!annotation.equalsIgnoreCase(NOT_FOUND) - && annotation.equalsIgnoreCase(NOT_OBSERVED)) + for (String annotation : annotations) { - return false; + if (annotation.equalsIgnoreCase(NOT_OBSERVED)) + { + return false; + } } return true; } @@ -619,18 +717,19 @@ public class SiftsClient implements SiftsClientI * @param type * @return */ - private String getResidueAnnotaiton(Residue residue, + private HashSet getResidueAnnotaitons(Residue residue, ResidueDetailType type) { + HashSet foundAnnotations = new HashSet(); List resDetails = residue.getResidueDetail(); for (ResidueDetail resDetail : resDetails) { if (resDetail.getProperty().equalsIgnoreCase(type.getCode())) { - return resDetail.getContent(); + foundAnnotations.add(resDetail.getContent()); } } - return NOT_FOUND; + return foundAnnotations; } @Override @@ -675,71 +774,117 @@ public class SiftsClient implements SiftsClientI } - /** - * - * @param chainId - * Target chain to populate mapping of its atom positions. - * @param mapping - * Two dimension array of residue index versus atom position - * @throws IllegalArgumentException - * Thrown if chainId or mapping is null - */ - void populateAtomPositions(String chainId, HashMap mapping) - throws IllegalArgumentException + + @Override + public Entity getEntityById(String id) throws SiftsException { - PDBChain chain = pdb.findChain(chainId); - if (chain == null || mapping == null) + // Sometimes SIFTS mappings are wrongly swapped between different chains of + // a PDB entry. This results to wrong mappings being generated. The boolean + // flag 'isGetEntityIdDirectly, determines whether an entity to process is + // determined by a greedy heuristic search or by just matching the Chain Id + // directly against the entity Id tag. Setting the default value to 'false' + // utilise the heuristic search which always produces correct mappings but + // less optimised processing, where as changing the value to 'true' + // optimises performance but might result to incorrect mapping in some cases + // where SIFTS mappings are wrongly swapped between different chains. + boolean isGetEntityIdDirectly = false; + if (isGetEntityIdDirectly) { - throw new IllegalArgumentException( - "Chain id or mapping must not be null."); - } - for (int[] map : mapping.values()) - { - if (map[PDB_RES_POS] != UNASSIGNED) + List entities = siftsEntry.getEntity(); + for (Entity entity : entities) { - map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + if (!entity.getEntityId().equalsIgnoreCase(id)) + { + continue; + } + return entity; } } + Entity entity = getEntityByMostOptimalMatchedId(id); + if (entity != null) + { + return entity; + } + throw new SiftsException("Entity " + id + " not found"); } /** + * This method was added because EntityId is NOT always equal to ChainId. + * Hence, it provides the logic to greedily detect the "true" Entity for a + * given chainId where discrepancies exist. * - * @param residueIndex - * The residue index used for the search - * @param atoms - * A collection of Atom to search - * @return atom position for the given residue index + * @param chainId + * @return */ - int getAtomIndex(int residueIndex, Collection atoms) + public Entity getEntityByMostOptimalMatchedId(String chainId) { - if (atoms == null) + // System.out.println("---> advanced greedy entityId matching block entered.."); + List entities = siftsEntry.getEntity(); + SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; + int count = 0; + for (Entity entity : entities) { - throw new IllegalArgumentException( - "atoms collection must not be null!"); + sPojo[count] = new SiftsEntitySortPojo(); + sPojo[count].entityId = entity.getEntityId(); + + List segments = entity.getSegment(); + for (Segment segment : segments) + { + List residues = segment.getListResidue().getResidue(); + for (Residue residue : residues) + { + List cRefDbs = residue.getCrossRefDb(); + for (CrossRefDb cRefDb : cRefDbs) + { + if (!cRefDb.getDbSource().equalsIgnoreCase("PDB")) + { + continue; + } + ++sPojo[count].resCount; + if (cRefDb.getDbChainId().equalsIgnoreCase(chainId)) + { + ++sPojo[count].chainIdFreq; + } + } + } + } + sPojo[count].pid = 100 * (sPojo[count].chainIdFreq / sPojo[count].resCount); + ++count; } - for (Atom atom : atoms) + Arrays.sort(sPojo, Collections.reverseOrder()); + System.out.println("highest matched entity : " + sPojo[0].entityId); + System.out.println("highest matched pid : " + sPojo[0].pid); + + if (sPojo[0].entityId != null) { - if (atom.resNumber == residueIndex) + for (Entity entity : entities) { - return atom.atomIndex; + if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) + { + continue; + } + return entity; } } - return UNASSIGNED; + return null; } - @Override - public Entity getEntityById(String id) throws SiftsException + public class SiftsEntitySortPojo implements + Comparable { - List entities = siftsEntry.getEntity(); - for (Entity entity : entities) + public String entityId; + + public int chainIdFreq; + + public int pid; + + public int resCount; + + @Override + public int compareTo(SiftsEntitySortPojo o) { - if (!entity.getEntityId().equalsIgnoreCase(id)) - { - continue; - } - return entity; + return this.pid - o.pid; } - throw new SiftsException("Entity " + id + " not found"); } @Override @@ -769,9 +914,9 @@ public class SiftsClient implements SiftsClientI String strName = mp.getStrName(); int pdbStart = mp.getStrStart(); int pdbEnd = mp.getStrEnd(); - + String type = mp.getType(); - + int maxid = (seqName.length() >= strName.length()) ? seqName.length() : strName.length(); int len = 72 - maxid - 1; @@ -798,7 +943,7 @@ public class SiftsClient implements SiftsClientI output.append(" - "); output.append(String.valueOf(pdbEnd)); output.append(NEWLINE).append(NEWLINE); - + int matchedSeqCount = 0; for (int j = 0; j < nochunks; j++) { @@ -822,32 +967,33 @@ public class SiftsClient implements SiftsClientI { try { - if ((i + (j * len)) < seqRes.length()) - { - if (seqRes.charAt(i + (j * len)) == strRes.charAt(i + (j * len)) - && !jalview.util.Comparison.isGap(seqRes.charAt(i - + (j * len)))) + if ((i + (j * len)) < seqRes.length()) { + if (seqRes.charAt(i + (j * len)) == strRes + .charAt(i + (j * len)) + && !jalview.util.Comparison.isGap(seqRes.charAt(i + + (j * len)))) + { matchedSeqCount++; - output.append("|"); - } - else if (type.equals("pep")) - { - if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), - strRes.charAt(i + (j * len))) > 0) + output.append("|"); + } + else if (type.equals("pep")) { - output.append("."); + if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), + strRes.charAt(i + (j * len))) > 0) + { + output.append("."); + } + else + { + output.append(" "); + } } else { output.append(" "); } } - else - { - output.append(" "); - } - } } catch (IndexOutOfBoundsException e) { continue; @@ -867,17 +1013,17 @@ public class SiftsClient implements SiftsClientI output.append(NEWLINE).append(NEWLINE); } float pid = (float) matchedSeqCount / seqRes.length() * 100; - if (pid < 2) + if (pid < SiftsSettings.getFailSafePIDThreshold()) { - throw new SiftsException("Low PID detected for SIFTs mapping..."); + throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); } - output.append("Length of alignment = " + seqRes.length()) - .append(NEWLINE); + output.append("Length of alignment = " + seqRes.length()).append( + NEWLINE); output.append(new Format("Percentage ID = %2.2f").form(pid)); output.append(NEWLINE); return output; } - + @Override public int getEntityCount() { @@ -913,4 +1059,5 @@ public class SiftsClient implements SiftsClientI { return siftsEntry.getDbVersion(); } + }