From 2c582d859f37cd06681f9f47a5e6eea47ac0b9ef Mon Sep 17 00:00:00 2001 From: tcofoegbu Date: Wed, 27 Jan 2016 12:24:27 +0000 Subject: [PATCH] JAL-1479 fixed off-by-one error in SIFTS mapping output and added the flag 'isGetEntityIdDirectly' to control how an entity to process is determined. --- src/jalview/ws/sifts/SiftsClient.java | 156 ++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 69 deletions(-) diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index f866127f..6e7b988 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -103,10 +103,6 @@ public class SiftsClient implements SiftsClientI private final static String NEWLINE = System.lineSeparator(); - // private final static int CACHE_THRESHOLD_IN_DAYS = 2; - // - // private final static int FAIL_SAFE_PID_THRESHOLD = 30; - private String curSourceDBRef; private HashSet curDBRefAccessionIdsString; @@ -284,6 +280,10 @@ public class SiftsClient implements SiftsClientI */ public static File downloadSiftsFile(String pdbId) throws SiftsException { + if (pdbId.contains(".cif")) + { + pdbId = pdbId.replace(".cif", ""); + } String siftFile = pdbId + ".xml.gz"; String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() @@ -530,7 +530,7 @@ public class SiftsClient implements SiftsClientI { continue; } - if (currSeqIndex > seq.getStart() && currSeqIndex <= seq.getEnd()) + if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) { int resNum; try @@ -596,12 +596,17 @@ public class SiftsClient implements SiftsClientI int orignalSeqStart = seq.getStart(); if (orignalSeqStart >= 1) { - int subSeqStart = seqStart - orignalSeqStart; + int subSeqStart = (seqStart >= orignalSeqStart) ? seqStart + - orignalSeqStart : 0; int subSeqEnd = seqEnd - (orignalSeqStart - 1); subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() : subSeqEnd; matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); } + else + { + matchedSeq = originalSeq.substring(1, originalSeq.length()); + } } StringBuilder targetStrucSeqs = new StringBuilder(); @@ -613,13 +618,13 @@ public class SiftsClient implements SiftsClientI if (os != null) { MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(seqStart); - mop.setSeqEnd(seqEnd); + mop.setSeqStart(pdbStart); + mop.setSeqEnd(pdbEnd); mop.setSeqName(seq.getName()); mop.setSeqResidue(matchedSeq); - mop.setStrStart(pdbStart); - mop.setStrEnd(pdbEnd); + mop.setStrStart(seqStart); + mop.setStrEnd(seqEnd); mop.setStrName(structId); mop.setStrResidue(targetStrucSeqs.toString()); @@ -630,6 +635,58 @@ public class SiftsClient implements SiftsClientI } /** + * + * @param chainId + * Target chain to populate mapping of its atom positions. + * @param mapping + * Two dimension array of residue index versus atom position + * @throws IllegalArgumentException + * Thrown if chainId or mapping is null + */ + void populateAtomPositions(String chainId, + HashMap mapping) throws IllegalArgumentException + { + PDBChain chain = pdb.findChain(chainId); + if (chain == null || mapping == null) + { + throw new IllegalArgumentException( + "Chain id or mapping must not be null."); + } + for (int[] map : mapping.values()) + { + if (map[PDB_RES_POS] != UNASSIGNED) + { + map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + } + } + } + + /** + * + * @param residueIndex + * The residue index used for the search + * @param atoms + * A collection of Atom to search + * @return atom position for the given residue index + */ + int getAtomIndex(int residueIndex, Collection atoms) + { + if (atoms == null) + { + throw new IllegalArgumentException( + "atoms collection must not be null!"); + } + for (Atom atom : atoms) + { + if (atom.resNumber == residueIndex) + { + return atom.atomIndex; + } + } + return UNASSIGNED; + } + + /** * Checks if the residue instance is marked 'Not_observed' or not * * @param residue @@ -716,69 +773,32 @@ public class SiftsClient implements SiftsClientI } } - /** - * - * @param chainId - * Target chain to populate mapping of its atom positions. - * @param mapping - * Two dimension array of residue index versus atom position - * @throws IllegalArgumentException - * Thrown if chainId or mapping is null - */ - void populateAtomPositions(String chainId, HashMap mapping) - throws IllegalArgumentException - { - PDBChain chain = pdb.findChain(chainId); - if (chain == null || mapping == null) - { - throw new IllegalArgumentException( - "Chain id or mapping must not be null."); - } - for (int[] map : mapping.values()) - { - if (map[PDB_RES_POS] != UNASSIGNED) - { - map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); - } - } - } - /** - * - * @param residueIndex - * The residue index used for the search - * @param atoms - * A collection of Atom to search - * @return atom position for the given residue index - */ - int getAtomIndex(int residueIndex, Collection atoms) - { - if (atoms == null) - { - throw new IllegalArgumentException( - "atoms collection must not be null!"); - } - for (Atom atom : atoms) - { - if (atom.resNumber == residueIndex) - { - return atom.atomIndex; - } - } - return UNASSIGNED; - } @Override public Entity getEntityById(String id) throws SiftsException { - List entities = siftsEntry.getEntity(); - for (Entity entity : entities) + // Sometimes SIFTS mappings are wrongly swapped between different chains of + // a PDB entry. This results to wrong mappings being generated. The boolean + // flag 'isGetEntityIdDirectly, determines whether an entity to process is + // determined by a greedy heuristic search or by just matching the Chain Id + // directly against the entity Id tag. Setting the default value to 'false' + // utilise the heuristic search which always produces correct mappings but + // less optimised processing, where as changing the value to 'true' + // optimises performance but might result to incorrect mapping in some cases + // where SIFTS mappings are wrongly swapped between different chains. + boolean isGetEntityIdDirectly = false; + if (isGetEntityIdDirectly) { - if (!entity.getEntityId().equalsIgnoreCase(id)) + List entities = siftsEntry.getEntity(); + for (Entity entity : entities) { - continue; + if (!entity.getEntityId().equalsIgnoreCase(id)) + { + continue; + } + return entity; } - return entity; } Entity entity = getEntityByMostOptimalMatchedId(id); if (entity != null) @@ -798,8 +818,7 @@ public class SiftsClient implements SiftsClientI */ public Entity getEntityByMostOptimalMatchedId(String chainId) { - System.out - .println("--------------> advanced greedy entityId matching block entered.."); + // System.out.println("---> advanced greedy entityId matching block entered.."); List entities = siftsEntry.getEntity(); SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; int count = 0; @@ -996,8 +1015,7 @@ public class SiftsClient implements SiftsClientI float pid = (float) matchedSeqCount / seqRes.length() * 100; if (pid < SiftsSettings.getFailSafePIDThreshold()) { - throw new SiftsException( -">>> Low PID detected for SIFTs mapping..."); + throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); } output.append("Length of alignment = " + seqRes.length()).append( NEWLINE); -- 1.7.10.2