X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=6c11dd29af024984b2a5fb809b73758aa52c5887;hb=f7c9911a43d7b85a3f0097eb99825b73f3a95713;hp=9f7bc525335d335aaa1ca6923c20ee7997ab5e01;hpb=6dacd9d2805d0899a59329fdc4e43dfacdf93044;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 9f7bc52..6c11dd2 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -26,6 +26,7 @@ import jalview.api.SiftsClientI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.SequenceI; +import jalview.io.StructureFile; import jalview.schemes.ResidueProperties; import jalview.structure.StructureMapping; import jalview.util.Format; @@ -77,7 +78,7 @@ public class SiftsClient implements SiftsClientI { private Entry siftsEntry; - private PDBfile pdb; + private StructureFile pdb; private String pdbId; @@ -95,16 +96,12 @@ public class SiftsClient implements SiftsClientI private static final int PDB_ATOM_POS = 1; - private static final String NOT_FOUND = "Not_Found"; - private static final String NOT_OBSERVED = "Not_Observed"; - private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; + private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; private final static String NEWLINE = System.lineSeparator(); - private final static int THRESHOLD_IN_DAYS = 2; - private String curSourceDBRef; private HashSet curDBRefAccessionIdsString; @@ -149,10 +146,10 @@ public class SiftsClient implements SiftsClientI * @param pdbId * @throws SiftsException */ - public SiftsClient(PDBfile pdb) throws SiftsException + public SiftsClient(StructureFile pdb) throws SiftsException { this.pdb = pdb; - this.pdbId = pdb.id; + this.pdbId = pdb.getId(); File siftsFile = getSiftsFile(pdbId); siftsEntry = parseSIFTs(siftsFile); } @@ -169,7 +166,7 @@ public class SiftsClient implements SiftsClientI public SiftsClient(PDBfile pdb, File siftsFile) throws SiftsException { this.pdb = pdb; - this.pdbId = pdb.id; + this.pdbId = pdb.getId(); siftsEntry = parseSIFTs(siftsFile); } @@ -226,21 +223,39 @@ public class SiftsClient implements SiftsClientI */ public static File getSiftsFile(String pdbId) throws SiftsException { - File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() - + pdbId.toLowerCase() + ".xml.gz"); + String siftsFileName = SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"; + File siftsFile = new File(siftsFileName); if (siftsFile.exists()) { // The line below is required for unit testing... don't comment it out!!! System.out.println(">>> SIFTS File already downloaded for " + pdbId); - if (isFileOlderThanThreshold(siftsFile, THRESHOLD_IN_DAYS)) + if (isFileOlderThanThreshold(siftsFile, + SiftsSettings.getCacheThresholdInDays())) { - // System.out.println("Downloaded file is out of date, hence re-downloading..."); - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + File oldSiftsFile = new File(siftsFileName + "_old"); + siftsFile.renameTo(oldSiftsFile); + try + { + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + oldSiftsFile.delete(); + return siftsFile; + } catch (IOException e) + { + e.printStackTrace(); + oldSiftsFile.renameTo(siftsFile); + return new File(siftsFileName); + } } - return siftsFile; } - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + try + { + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + } catch (IOException e) + { + throw new SiftsException(e.getMessage()); + } return siftsFile; } @@ -278,9 +293,15 @@ public class SiftsClient implements SiftsClientI * @param pdbId * @return downloaded SIFTs XML file * @throws SiftsException + * @throws IOException */ - public static File downloadSiftsFile(String pdbId) throws SiftsException + public static File downloadSiftsFile(String pdbId) throws SiftsException, + IOException { + if (pdbId.contains(".cif")) + { + pdbId = pdbId.replace(".cif", ""); + } String siftFile = pdbId + ".xml.gz"; String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() @@ -291,8 +312,6 @@ public class SiftsClient implements SiftsClientI { siftsDownloadDir.mkdirs(); } - try - { // System.out.println(">> Download ftp url : " + siftsFileFTPURL); URL url = new URL(siftsFileFTPURL); URLConnection conn = url.openConnection(); @@ -308,10 +327,6 @@ public class SiftsClient implements SiftsClientI outputStream.close(); inputStream.close(); // System.out.println(">>> File downloaded : " + downloadedSiftsFile); - } catch (IOException ex) - { - throw new SiftsException(ex.getMessage()); - } return new File(downloadedSiftsFile); } @@ -356,7 +371,8 @@ public class SiftsClient implements SiftsClientI DBRefEntry[] dbRefs = seq.getDBRefs(); if (dbRefs == null || dbRefs.length < 1) { - throw new SiftsException("Could not get source DB Ref"); + throw new SiftsException( + "Source DBRef could not be determined. DBRefs might not have been retrieved."); } for (DBRefEntryI dbRef : dbRefs) @@ -370,6 +386,7 @@ public class SiftsClient implements SiftsClientI && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef .getSource().equalsIgnoreCase(DBRefSource.PDB))) { + seq.setSourceDBRef(dbRef); return dbRef; } } @@ -409,7 +426,8 @@ public class SiftsClient implements SiftsClientI .getMapRegion(); for (MapRegion mapRegion : mapRegions) { - accessions.add(mapRegion.getDb().getDbAccessionId()); + accessions + .add(mapRegion.getDb().getDbAccessionId().toLowerCase()); } } } @@ -453,19 +471,16 @@ public class SiftsClient implements SiftsClientI { ArrayList omitNonObserved = new ArrayList(); int nonObservedShiftIndex = 0; - System.out.println("Generating mappings for : " + entityId); + // System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); String originalSeq = AlignSeq.extractGaps( jalview.util.Comparison.GapChars, seq.getSequenceAsString()); HashMap mapping = new HashMap(); DBRefEntryI sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef == null) - { - sourceDBRef = getValidSourceDBRef(seq); - // TODO ensure sequence start/end is in the same coordinate system and - // consistent with the choosen sourceDBRef - } + sourceDBRef = getValidSourceDBRef(seq); + // TODO ensure sequence start/end is in the same coordinate system and + // consistent with the choosen sourceDBRef // set sequence coordinate system - default value is UniProt if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) @@ -488,8 +503,8 @@ public class SiftsClient implements SiftsClientI for (Segment segment : segments) { segStartEnd = segment.getStart() + " - " + segment.getEnd(); - System.out.println("Mappging segments : " + segment.getSegId() + "\\" - + segStartEnd); + // System.out.println("Mapping segments : " + segment.getSegId() + "\\" + // + segStartEnd); List residues = segment.getListResidue().getResidue(); for (Residue residue : residues) { @@ -516,6 +531,7 @@ public class SiftsClient implements SiftsClientI { currSeqIndex = Integer.valueOf(resNumIndexString .split("[a-zA-Z]")[0]); + continue; } if (pdbRefDb != null) { @@ -527,7 +543,7 @@ public class SiftsClient implements SiftsClientI { continue; } - if (currSeqIndex > seq.getStart() && currSeqIndex <= seq.getEnd()) + if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) { int resNum; try @@ -540,6 +556,7 @@ public class SiftsClient implements SiftsClientI resNum = (pdbRefDb == null) ? Integer.valueOf(residue .getDbResNum()) : Integer.valueOf(pdbRefDb .getDbResNum().split("[a-zA-Z]")[0]); + continue; } if (isResidueObserved(residue) @@ -578,6 +595,10 @@ public class SiftsClient implements SiftsClientI Integer[] keys = mapping.keySet().toArray(new Integer[0]); Arrays.sort(keys); + if (keys.length < 1) + { + throw new SiftsException(">>> Empty SIFTS mapping generated!!"); + } seqStart = keys[0]; seqEnd = keys[keys.length - 1]; @@ -589,12 +610,17 @@ public class SiftsClient implements SiftsClientI int orignalSeqStart = seq.getStart(); if (orignalSeqStart >= 1) { - int subSeqStart = seqStart - orignalSeqStart; + int subSeqStart = (seqStart >= orignalSeqStart) ? seqStart + - orignalSeqStart : 0; int subSeqEnd = seqEnd - (orignalSeqStart - 1); subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() : subSeqEnd; matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); } + else + { + matchedSeq = originalSeq.substring(1, originalSeq.length()); + } } StringBuilder targetStrucSeqs = new StringBuilder(); @@ -606,13 +632,13 @@ public class SiftsClient implements SiftsClientI if (os != null) { MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(seqStart); - mop.setSeqEnd(seqEnd); + mop.setSeqStart(pdbStart); + mop.setSeqEnd(pdbEnd); mop.setSeqName(seq.getName()); mop.setSeqResidue(matchedSeq); - mop.setStrStart(pdbStart); - mop.setStrEnd(pdbEnd); + mop.setStrStart(seqStart); + mop.setStrEnd(seqEnd); mop.setStrName(structId); mop.setStrResidue(targetStrucSeqs.toString()); @@ -623,6 +649,65 @@ public class SiftsClient implements SiftsClientI } /** + * + * @param chainId + * Target chain to populate mapping of its atom positions. + * @param mapping + * Two dimension array of residue index versus atom position + * @throws IllegalArgumentException + * Thrown if chainId or mapping is null + */ + void populateAtomPositions(String chainId, + HashMap mapping) throws IllegalArgumentException + { + try + { + PDBChain chain = pdb.findChain(chainId); + + if (chain == null || mapping == null) + { + throw new IllegalArgumentException( + "Chain id or mapping must not be null."); + } + for (int[] map : mapping.values()) + { + if (map[PDB_RES_POS] != UNASSIGNED) + { + map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + } + } + } catch (Exception e) + { + e.printStackTrace(); + } + } + + /** + * + * @param residueIndex + * The residue index used for the search + * @param atoms + * A collection of Atom to search + * @return atom position for the given residue index + */ + int getAtomIndex(int residueIndex, Collection atoms) + { + if (atoms == null) + { + throw new IllegalArgumentException( + "atoms collection must not be null!"); + } + for (Atom atom : atoms) + { + if (atom.resNumber == residueIndex) + { + return atom.atomIndex; + } + } + return UNASSIGNED; + } + + /** * Checks if the residue instance is marked 'Not_observed' or not * * @param residue @@ -630,16 +715,18 @@ public class SiftsClient implements SiftsClientI */ private boolean isResidueObserved(Residue residue) { - String annotation = getResidueAnnotaiton(residue, + HashSet annotations = getResidueAnnotaitons(residue, ResidueDetailType.ANNOTATION); - if (annotation == null) + if (annotations == null || annotations.isEmpty()) { return true; } - if (!annotation.equalsIgnoreCase(NOT_FOUND) - && annotation.equalsIgnoreCase(NOT_OBSERVED)) + for (String annotation : annotations) { - return false; + if (annotation.equalsIgnoreCase(NOT_OBSERVED)) + { + return false; + } } return true; } @@ -651,18 +738,19 @@ public class SiftsClient implements SiftsClientI * @param type * @return */ - private String getResidueAnnotaiton(Residue residue, + private HashSet getResidueAnnotaitons(Residue residue, ResidueDetailType type) { + HashSet foundAnnotations = new HashSet(); List resDetails = residue.getResidueDetail(); for (ResidueDetail resDetail : resDetails) { if (resDetail.getProperty().equalsIgnoreCase(type.getCode())) { - return resDetail.getContent(); + foundAnnotations.add(resDetail.getContent()); } } - return NOT_FOUND; + return foundAnnotations; } @Override @@ -675,8 +763,9 @@ public class SiftsClient implements SiftsClientI private boolean isFoundInSiftsEntry(String accessionId) { + HashSet siftsDBRefs = getAllMappingAccession(); return accessionId != null - && getAllMappingAccession().contains(accessionId); + && siftsDBRefs.contains(accessionId.toLowerCase()); } /** @@ -695,8 +784,8 @@ public class SiftsClient implements SiftsClientI Arrays.sort(keys); int firstIndex = keys[0]; int lastIndex = keys[keys.length - 1]; - System.out.println("Min value " + firstIndex); - System.out.println("Max value " + lastIndex); + // System.out.println("Min value " + firstIndex); + // System.out.println("Max value " + lastIndex); for (int x = firstIndex; x <= lastIndex; x++) { if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x)) @@ -706,69 +795,32 @@ public class SiftsClient implements SiftsClientI } } - /** - * - * @param chainId - * Target chain to populate mapping of its atom positions. - * @param mapping - * Two dimension array of residue index versus atom position - * @throws IllegalArgumentException - * Thrown if chainId or mapping is null - */ - void populateAtomPositions(String chainId, HashMap mapping) - throws IllegalArgumentException - { - PDBChain chain = pdb.findChain(chainId); - if (chain == null || mapping == null) - { - throw new IllegalArgumentException( - "Chain id or mapping must not be null."); - } - for (int[] map : mapping.values()) - { - if (map[PDB_RES_POS] != UNASSIGNED) - { - map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); - } - } - } - /** - * - * @param residueIndex - * The residue index used for the search - * @param atoms - * A collection of Atom to search - * @return atom position for the given residue index - */ - int getAtomIndex(int residueIndex, Collection atoms) - { - if (atoms == null) - { - throw new IllegalArgumentException( - "atoms collection must not be null!"); - } - for (Atom atom : atoms) - { - if (atom.resNumber == residueIndex) - { - return atom.atomIndex; - } - } - return UNASSIGNED; - } @Override public Entity getEntityById(String id) throws SiftsException { - List entities = siftsEntry.getEntity(); - for (Entity entity : entities) + // Sometimes SIFTS mappings are wrongly swapped between different chains of + // a PDB entry. This results to wrong mappings being generated. The boolean + // flag 'isGetEntityIdDirectly, determines whether an entity to process is + // determined by a greedy heuristic search or by just matching the Chain Id + // directly against the entity Id tag. Setting the default value to 'false' + // utilise the heuristic search which always produces correct mappings but + // less optimised processing, where as changing the value to 'true' + // optimises performance but might result to incorrect mapping in some cases + // where SIFTS mappings are wrongly swapped between different chains. + boolean isGetEntityIdDirectly = false; + if (isGetEntityIdDirectly) { - if (!entity.getEntityId().equalsIgnoreCase(id)) + List entities = siftsEntry.getEntity(); + for (Entity entity : entities) { - continue; + if (!entity.getEntityId().equalsIgnoreCase(id)) + { + continue; + } + return entity; } - return entity; } Entity entity = getEntityByMostOptimalMatchedId(id); if (entity != null) @@ -788,8 +840,7 @@ public class SiftsClient implements SiftsClientI */ public Entity getEntityByMostOptimalMatchedId(String chainId) { - System.out - .println("--------------> advanced greedy entityId matching block entered.."); + // System.out.println("---> advanced greedy entityId matching block entered.."); List entities = siftsEntry.getEntity(); SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; int count = 0; @@ -823,8 +874,8 @@ public class SiftsClient implements SiftsClientI ++count; } Arrays.sort(sPojo, Collections.reverseOrder()); - System.out.println("highest matched entity : " + sPojo[0].entityId); - System.out.println("highest matched pid : " + sPojo[0].pid); + // System.out.println("highest matched entity : " + sPojo[0].entityId); + // System.out.println("highest matched pid : " + sPojo[0].pid); if (sPojo[0].entityId != null) { @@ -897,7 +948,8 @@ public class SiftsClient implements SiftsClientI // output mappings StringBuffer output = new StringBuffer(); output.append(NEWLINE); - output.append("Sequence ⟷ Structure mapping details").append(NEWLINE); + output.append("Sequence \u27f7 Structure mapping details").append( + NEWLINE); output.append("Method: SIFTS"); output.append(NEWLINE).append(NEWLINE); @@ -984,9 +1036,9 @@ public class SiftsClient implements SiftsClientI output.append(NEWLINE).append(NEWLINE); } float pid = (float) matchedSeqCount / seqRes.length() * 100; - if (pid < 2) + if (pid < SiftsSettings.getFailSafePIDThreshold()) { - throw new SiftsException("Low PID detected for SIFTs mapping..."); + throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); } output.append("Length of alignment = " + seqRes.length()).append( NEWLINE);