X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=6e7b9880284ebbf81dca2b24c8fe4e7645042512;hb=2c582d859f37cd06681f9f47a5e6eea47ac0b9ef;hp=176c511acdedb5a4f997c65adf771f56f3f9536d;hpb=9b6d2ddd33ba5e479953700040e1129847e47a06;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 176c511..6e7b988 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -24,6 +24,7 @@ import jalview.analysis.AlignSeq; import jalview.api.DBRefEntryI; import jalview.api.SiftsClientI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; import jalview.structure.StructureMapping; @@ -34,6 +35,7 @@ import jalview.xml.binding.sifts.Entry.Entity.Segment; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; import jalview.xml.binding.sifts.Entry.ListDB.Db; import java.io.File; @@ -45,10 +47,18 @@ import java.io.InputStream; import java.io.PrintStream; import java.net.URL; import java.net.URLConnection; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedHashMap; import java.util.List; +import java.util.TreeMap; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; @@ -59,67 +69,106 @@ import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; +import MCview.Atom; +import MCview.PDBChain; +import MCview.PDBfile; + public class SiftsClient implements SiftsClientI { private Entry siftsEntry; + private PDBfile pdb; + private String pdbId; private String structId; private String segStartEnd; + private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; + private static final int BUFFER_SIZE = 4096; - private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; + public static final int UNASSIGNED = -1; - public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System - .getProperty("user.home") - + File.separatorChar - + ".sifts_downloads" + File.separatorChar; + private static final int PDB_RES_POS = 0; - public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache - .getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR); + private static final int PDB_ATOM_POS = 1; + + private static final String NOT_FOUND = "Not_Found"; + + private static final String NOT_OBSERVED = "Not_Observed"; + + private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; private final static String NEWLINE = System.lineSeparator(); + private String curSourceDBRef; + + private HashSet curDBRefAccessionIdsString; + + public enum CoordinateSys + { + UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); + private String name; + + private CoordinateSys(String name) + { + this.name = name; + } + + public String getName() + { + return name; + } + }; + + public enum ResidueDetailType + { + NAME_SEC_STRUCTURE("nameSecondaryStructure"), CODE_SEC_STRUCTURE( + "codeSecondaryStructure"), ANNOTATION("Annotation"); + private String code; + + private ResidueDetailType(String code) + { + this.code = code; + } + + public String getCode() + { + return code; + } + }; + /** - * Fetch SIFTs file for the given PDB Id and construct an instance of + * Fetch SIFTs file for the given PDBfile and construct an instance of * SiftsClient * * @param pdbId + * @throws SiftsException */ - public SiftsClient(String pdbId) + public SiftsClient(PDBfile pdb) throws SiftsException { - this.pdbId = pdbId; - try - { - File siftsFile = getSiftsFile(pdbId); - siftsEntry = parseSIFTs(siftsFile); - } catch (Exception e) - { - e.printStackTrace(); - } + this.pdb = pdb; + this.pdbId = pdb.id; + File siftsFile = getSiftsFile(pdbId); + siftsEntry = parseSIFTs(siftsFile); } /** - * Construct an instance of SiftsClient using the supplied SIFTs file - - * the SIFTs file should correspond to the given PDB Id + * Construct an instance of SiftsClient using the supplied SIFTs file. Note: + * The SIFTs file should correspond to the PDB Id in PDBfile instance * * @param pdbId * @param siftsFile + * @throws SiftsException + * @throws Exception */ - public SiftsClient(String pdbId, File siftsFile) + public SiftsClient(PDBfile pdb, File siftsFile) throws SiftsException { - this.pdbId = pdbId; - try - { - siftsEntry = parseSIFTs(siftsFile); - } catch (Exception e) - { - e.printStackTrace(); - } - + this.pdb = pdb; + this.pdbId = pdb.id; + siftsEntry = parseSIFTs(siftsFile); } /** @@ -131,14 +180,13 @@ public class SiftsClient implements SiftsClientI * @throws Exception * if a problem occurs while parsing the SIFTs XML */ - private Entry parseSIFTs(File siftFile) throws Exception + private Entry parseSIFTs(File siftFile) throws SiftsException { - try + try (InputStream in = new FileInputStream(siftFile); + GZIPInputStream gzis = new GZIPInputStream(in);) { - System.out.println("File : " + siftFile.getAbsolutePath()); + // System.out.println("File : " + siftFile.getAbsolutePath()); JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); - InputStream in = new FileInputStream(siftFile); - GZIPInputStream gzis = new GZIPInputStream(in); XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); @@ -146,38 +194,49 @@ public class SiftsClient implements SiftsClientI } catch (JAXBException e) { e.printStackTrace(); + throw new SiftsException(e.getMessage()); } catch (FileNotFoundException e) { e.printStackTrace(); + throw new SiftsException(e.getMessage()); } catch (XMLStreamException e) { e.printStackTrace(); + throw new SiftsException(e.getMessage()); } catch (FactoryConfigurationError e) { e.printStackTrace(); + throw new SiftsException(e.getMessage()); } catch (IOException e) { e.printStackTrace(); + throw new SiftsException(e.getMessage()); } - throw new Exception("Error parsing siftFile"); } /** - * Get a SIFTs XML file for a given PDB Id + * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP + * repository if not found in cache * * @param pdbId * @return SIFTs XML file + * @throws SiftsException */ - public static File getSiftsFile(String pdbId) + public static File getSiftsFile(String pdbId) throws SiftsException { - File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() - + ".xml.gz"); + File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"); if (siftsFile.exists()) { - // TODO it may be worth performing a timestamp age check to determine if a - // new SIFTs file should be re-downloaded as SIFTs entries are usually - // updated weekly + // The line below is required for unit testing... don't comment it out!!! System.out.println(">>> SIFTS File already downloaded for " + pdbId); + + if (isFileOlderThanThreshold(siftsFile, + SiftsSettings.getCacheThresholdInDays())) + { + // System.out.println("Downloaded file is out of date, hence re-downloading..."); + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + } return siftsFile; } siftsFile = downloadSiftsFile(pdbId.toLowerCase()); @@ -185,24 +244,59 @@ public class SiftsClient implements SiftsClientI } /** - * Download a SIFTs XML file for a given PDB Id + * This method enables checking if a cached file has exceeded a certain + * threshold(in days) + * + * @param file + * the cached file + * @param noOfDays + * the threshold in days + * @return + */ + public static boolean isFileOlderThanThreshold(File file, int noOfDays) + { + Path filePath = file.toPath(); + BasicFileAttributes attr; + int diffInDays = 0; + try + { + attr = Files.readAttributes(filePath, BasicFileAttributes.class); + diffInDays = (int) ((new Date().getTime() - attr.lastModifiedTime() + .toMillis()) / (1000 * 60 * 60 * 24)); + // System.out.println("Diff in days : " + diffInDays); + } catch (IOException e) + { + e.printStackTrace(); + } + return noOfDays <= diffInDays; + } + + /** + * Download a SIFTs XML file for a given PDB Id from an FTP repository * * @param pdbId * @return downloaded SIFTs XML file + * @throws SiftsException */ - public static File downloadSiftsFile(String pdbId) + public static File downloadSiftsFile(String pdbId) throws SiftsException { + if (pdbId.contains(".cif")) + { + pdbId = pdbId.replace(".cif", ""); + } String siftFile = pdbId + ".xml.gz"; String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; - String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile; - File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR); + String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() + + siftFile; + File siftsDownloadDir = new File( + SiftsSettings.getSiftDownloadDirectory()); if (!siftsDownloadDir.exists()) { siftsDownloadDir.mkdirs(); } try { - System.out.println(">> Download ftp url : " + siftsFileFTPURL); + // System.out.println(">> Download ftp url : " + siftsFileFTPURL); URL url = new URL(siftsFileFTPURL); URLConnection conn = url.openConnection(); InputStream inputStream = conn.getInputStream(); @@ -216,10 +310,10 @@ public class SiftsClient implements SiftsClientI } outputStream.close(); inputStream.close(); - System.out.println(">>> File downloaded : " + downloadedSiftsFile); + // System.out.println(">>> File downloaded : " + downloadedSiftsFile); } catch (IOException ex) { - ex.printStackTrace(); + throw new SiftsException(ex.getMessage()); } return new File(downloadedSiftsFile); } @@ -233,8 +327,8 @@ public class SiftsClient implements SiftsClientI */ public static boolean deleteSiftsFileByPDBId(String pdbId) { - File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() - + ".xml.gz"); + File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"); if (siftsFile.exists()) { return siftsFile.delete(); @@ -242,7 +336,6 @@ public class SiftsClient implements SiftsClientI return true; } - /** * Get a valid SIFTs DBRef for the given sequence current SIFTs entry * @@ -266,14 +359,6 @@ public class SiftsClient implements SiftsClientI DBRefEntry[] dbRefs = seq.getDBRefs(); if (dbRefs == null || dbRefs.length < 1) { - final SequenceI[] seqs = new SequenceI[] { seq }; - new jalview.ws.DBRefFetcher(seqs, null, null, null, false) - .fetchDBRefs(true); - dbRefs = seq.getDBRefs(); - } - - if (dbRefs == null || dbRefs.length < 1) - { throw new SiftsException("Could not get source DB Ref"); } @@ -285,8 +370,8 @@ public class SiftsClient implements SiftsClientI continue; } if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase("uniprot") || dbRef - .getSource().equalsIgnoreCase("pdb"))) + && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef + .getSource().equalsIgnoreCase(DBRefSource.PDB))) { return dbRef; } @@ -299,10 +384,9 @@ public class SiftsClient implements SiftsClientI throw new SiftsException("Could not get source DB Ref"); } - /** - * Check that the DBRef Entry is properly populated and is available in the - * instantiated SIFTs Entry + * Check that the DBRef Entry is properly populated and is available in this + * SiftClient instance * * @param entry * - DBRefEntry to validate @@ -312,7 +396,6 @@ public class SiftsClient implements SiftsClientI { return entry != null && entry.getAccessionId() != null && isFoundInSiftsEntry(entry.getAccessionId()); - // & entry.getStartRes() > 0; } @Override @@ -359,51 +442,51 @@ public class SiftsClient implements SiftsClientI mappingDetails.append(NEWLINE); } }; - int[][] mapping = getGreedyMapping(chain, seq, ps); + HashMap mapping = getGreedyMapping(chain, seq, ps); String mappingOutput = mappingDetails.toString(); - return new StructureMapping(seq, pdbFile, pdbId, chain, mapping, - mappingOutput); + StructureMapping siftsMapping = new StructureMapping(seq, pdbFile, + pdbId, chain, mapping, mappingOutput); + return siftsMapping; } @Override - public int[][] getGreedyMapping(String entityId, SequenceI seq, - java.io.PrintStream os) - throws SiftsException - { - int matchedResStart = -1; - int matchedResEnd = -1; - int counter = 0; - int pdbStart = -1; - int pdbEnd = -1; - int sStart = -1; - int sEnd = -1; - boolean startDetected = false; - + public HashMap getGreedyMapping(String entityId, + SequenceI seq, java.io.PrintStream os) throws SiftsException + { + ArrayList omitNonObserved = new ArrayList(); + int nonObservedShiftIndex = 0; System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); - String seqStr = AlignSeq.extractGaps(jalview.util.Comparison.GapChars, - seq.getSequenceAsString()); - int mapping[][] = new int[seqStr.length() + seq.getStart()][2]; + String originalSeq = AlignSeq.extractGaps( + jalview.util.Comparison.GapChars, seq.getSequenceAsString()); + HashMap mapping = new HashMap(); DBRefEntryI sourceDBRef = seq.getSourceDBRef(); if (sourceDBRef == null) { sourceDBRef = getValidSourceDBRef(seq); - // TODO update sequence start/end with sourceDBRef start/end - // seq.setStart(sourceDBRef.getStartRes()); - // seq.setEnd(sourceDBRef.getEndRes()); + // TODO ensure sequence start/end is in the same coordinate system and + // consistent with the choosen sourceDBRef } - String crossRefAccessionId = sourceDBRef.getAccessionId(); - int count = 0; - for (int residue[] : mapping) + // set sequence coordinate system - default value is UniProt + if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) + { + seqCoordSys = CoordinateSys.PDB; + } + + HashSet dbRefAccessionIdsString = new HashSet(); + for (DBRefEntry dbref : seq.getDBRefs()) { - residue[1] = count++; - residue[0] = -1; + dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase()); } - - LinkedHashMap resNumMap = new LinkedHashMap(); + dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase()); + + curDBRefAccessionIdsString = dbRefAccessionIdsString; + curSourceDBRef = sourceDBRef.getAccessionId(); + + TreeMap resNumMap = new TreeMap(); List segments = entity.getSegment(); for (Segment segment : segments) { @@ -413,72 +496,117 @@ public class SiftsClient implements SiftsClientI List residues = segment.getListResidue().getResidue(); for (Residue residue : residues) { - int refDbResNum = -1; + int currSeqIndex = UNASSIGNED; List cRefDbs = residue.getCrossRefDb(); + CrossRefDb pdbRefDb = null; for (CrossRefDb cRefDb : cRefDbs) { - if (cRefDb.getDbAccessionId().equalsIgnoreCase( - crossRefAccessionId)) + if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) { - refDbResNum = Integer.valueOf(cRefDb.getDbResNum()); + pdbRefDb = cRefDb; + } + if (cRefDb.getDbCoordSys() + .equalsIgnoreCase(seqCoordSys.getName()) + && isAccessionMatched(cRefDb.getDbAccessionId())) + { + String resNumIndexString = cRefDb.getDbResNum() + .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED) + : cRefDb.getDbResNum(); + try + { + currSeqIndex = Integer.valueOf(resNumIndexString); + } catch (NumberFormatException nfe) + { + currSeqIndex = Integer.valueOf(resNumIndexString + .split("[a-zA-Z]")[0]); + } + if (pdbRefDb != null) + { + break;// exit loop if pdb and uniprot are already found + } } } - if (refDbResNum == -1) + if (currSeqIndex == UNASSIGNED) { continue; } - int loopCount = 0; - for (int[] x : mapping) + if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) { - if (loopCount > seq.getStart() && x[1] == refDbResNum) + int resNum; + try + { + resNum = (pdbRefDb == null) ? Integer.valueOf(residue + .getDbResNum()) : Integer.valueOf(pdbRefDb + .getDbResNum()); + } catch (NumberFormatException nfe) + { + resNum = (pdbRefDb == null) ? Integer.valueOf(residue + .getDbResNum()) : Integer.valueOf(pdbRefDb + .getDbResNum().split("[a-zA-Z]")[0]); + } + + if (isResidueObserved(residue) + || seqCoordSys == CoordinateSys.UNIPROT) { - int resNum = Integer.valueOf(residue.getDbResNum()); - x[0] = resNum; char resCharCode = ResidueProperties - .getSingleCharacterCode(residue.getDbResName()); - resNumMap.put(resNum, String.valueOf(resCharCode)); + .getSingleCharacterCode(ResidueProperties + .getCanonicalAminoAcid(residue.getDbResName())); + resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); } - ++loopCount; + else + { + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; + } + mapping.put(currSeqIndex - nonObservedShiftIndex, new int[] { + Integer.valueOf(resNum), UNASSIGNED }); } } } - - for (int[] x : mapping) + try { - if (!startDetected && x[0] > -1) - { - matchedResStart = counter; - // System.out.println(matchedResStart); - startDetected = true; - } + populateAtomPositions(entityId, mapping); + } catch (Exception e) + { + e.printStackTrace(); + } + if (seqCoordSys == CoordinateSys.UNIPROT) + { + padWithGaps(resNumMap, omitNonObserved); + } + int seqStart = UNASSIGNED; + int seqEnd = UNASSIGNED; + int pdbStart = UNASSIGNED; + int pdbEnd = UNASSIGNED; - if (startDetected && x[0] == -1) - { - matchedResEnd = counter; - } - ++counter; + Integer[] keys = mapping.keySet().toArray(new Integer[0]); + Arrays.sort(keys); + if (keys.length < 1) + { + throw new SiftsException(">>> Empty SIFTS mapping generated!!"); } + seqStart = keys[0]; + seqEnd = keys[keys.length - 1]; - String matchedSeqStr = seqStr; - if (matchedResStart != -1) + String matchedSeq = originalSeq; + if (seqStart != UNASSIGNED) { - matchedResEnd = (matchedResEnd == -1) ? counter : matchedResEnd; - pdbStart = mapping[matchedResStart][0]; - pdbEnd = mapping[matchedResEnd - 1][0]; - sStart = mapping[matchedResStart][1]; - sEnd = mapping[matchedResEnd - 1][1]; - int seqStart = seq.getStart(); - if (seqStart > 1) + pdbStart = mapping.get(seqStart)[PDB_RES_POS]; + pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; + int orignalSeqStart = seq.getStart(); + if (orignalSeqStart >= 1) { - matchedResStart = matchedResStart - seqStart; - matchedResEnd = matchedResEnd - seqStart; + int subSeqStart = (seqStart >= orignalSeqStart) ? seqStart + - orignalSeqStart : 0; + int subSeqEnd = seqEnd - (orignalSeqStart - 1); + subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() + : subSeqEnd; + matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); } else { - --matchedResStart; - --matchedResEnd; + matchedSeq = originalSeq.substring(1, originalSeq.length()); } - matchedSeqStr = seqStr.substring(matchedResStart, matchedResEnd); } StringBuilder targetStrucSeqs = new StringBuilder(); @@ -487,54 +615,278 @@ public class SiftsClient implements SiftsClientI targetStrucSeqs.append(res); } - try + if (os != null) + { + MappingOutputPojo mop = new MappingOutputPojo(); + mop.setSeqStart(pdbStart); + mop.setSeqEnd(pdbEnd); + mop.setSeqName(seq.getName()); + mop.setSeqResidue(matchedSeq); + + mop.setStrStart(seqStart); + mop.setStrEnd(seqEnd); + mop.setStrName(structId); + mop.setStrResidue(targetStrucSeqs.toString()); + + mop.setType("pep"); + os.print(getMappingOutput(mop).toString()); + } + return mapping; + } + + /** + * + * @param chainId + * Target chain to populate mapping of its atom positions. + * @param mapping + * Two dimension array of residue index versus atom position + * @throws IllegalArgumentException + * Thrown if chainId or mapping is null + */ + void populateAtomPositions(String chainId, + HashMap mapping) throws IllegalArgumentException + { + PDBChain chain = pdb.findChain(chainId); + if (chain == null || mapping == null) + { + throw new IllegalArgumentException( + "Chain id or mapping must not be null."); + } + for (int[] map : mapping.values()) + { + if (map[PDB_RES_POS] != UNASSIGNED) + { + map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + } + } + } + + /** + * + * @param residueIndex + * The residue index used for the search + * @param atoms + * A collection of Atom to search + * @return atom position for the given residue index + */ + int getAtomIndex(int residueIndex, Collection atoms) + { + if (atoms == null) { - if (os != null) + throw new IllegalArgumentException( + "atoms collection must not be null!"); + } + for (Atom atom : atoms) + { + if (atom.resNumber == residueIndex) { - MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(sStart); - mop.setSeqEnd(sEnd); - mop.setSeqName(seq.getName()); - mop.setSeqResidue(matchedSeqStr); - - mop.setStrStart(pdbStart); - mop.setStrEnd(pdbEnd); - mop.setStrName(structId); - mop.setStrResidue(targetStrucSeqs.toString()); - - mop.setType("pep"); - os.print(getMappingOutput(mop).toString()); + return atom.atomIndex; } - } catch (Exception ex) + } + return UNASSIGNED; + } + + /** + * Checks if the residue instance is marked 'Not_observed' or not + * + * @param residue + * @return + */ + private boolean isResidueObserved(Residue residue) + { + HashSet annotations = getResidueAnnotaitons(residue, + ResidueDetailType.ANNOTATION); + if (annotations == null || annotations.isEmpty()) { - ex.printStackTrace(); + return true; } - return mapping; + for (String annotation : annotations) + { + if (annotation.equalsIgnoreCase(NOT_OBSERVED)) + { + return false; + } + } + return true; + } + + /** + * Get annotation String for a given residue and annotation type + * + * @param residue + * @param type + * @return + */ + private HashSet getResidueAnnotaitons(Residue residue, + ResidueDetailType type) + { + HashSet foundAnnotations = new HashSet(); + List resDetails = residue.getResidueDetail(); + for (ResidueDetail resDetail : resDetails) + { + if (resDetail.getProperty().equalsIgnoreCase(type.getCode())) + { + foundAnnotations.add(resDetail.getContent()); + } + } + return foundAnnotations; } @Override - public boolean isFoundInSiftsEntry(String accessionId) + public boolean isAccessionMatched(String accession) + { + boolean isStrictMatch = true; + return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) + : curDBRefAccessionIdsString.contains(accession.toLowerCase()); + } + + private boolean isFoundInSiftsEntry(String accessionId) { return accessionId != null && getAllMappingAccession().contains(accessionId); } + /** + * Pad omitted residue positions in PDB sequence with gaps + * + * @param resNumMap + */ + void padWithGaps(TreeMap resNumMap, + ArrayList omitNonObserved) + { + if (resNumMap == null || resNumMap.isEmpty()) + { + return; + } + Integer[] keys = resNumMap.keySet().toArray(new Integer[0]); + Arrays.sort(keys); + int firstIndex = keys[0]; + int lastIndex = keys[keys.length - 1]; + System.out.println("Min value " + firstIndex); + System.out.println("Max value " + lastIndex); + for (int x = firstIndex; x <= lastIndex; x++) + { + if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x)) + { + resNumMap.put(x, "-"); + } + } + } + + @Override public Entity getEntityById(String id) throws SiftsException { - List entities = siftsEntry.getEntity(); - for (Entity entity : entities) + // Sometimes SIFTS mappings are wrongly swapped between different chains of + // a PDB entry. This results to wrong mappings being generated. The boolean + // flag 'isGetEntityIdDirectly, determines whether an entity to process is + // determined by a greedy heuristic search or by just matching the Chain Id + // directly against the entity Id tag. Setting the default value to 'false' + // utilise the heuristic search which always produces correct mappings but + // less optimised processing, where as changing the value to 'true' + // optimises performance but might result to incorrect mapping in some cases + // where SIFTS mappings are wrongly swapped between different chains. + boolean isGetEntityIdDirectly = false; + if (isGetEntityIdDirectly) { - if (!entity.getEntityId().equalsIgnoreCase(id)) + List entities = siftsEntry.getEntity(); + for (Entity entity : entities) { - continue; + if (!entity.getEntityId().equalsIgnoreCase(id)) + { + continue; + } + return entity; } + } + Entity entity = getEntityByMostOptimalMatchedId(id); + if (entity != null) + { return entity; } throw new SiftsException("Entity " + id + " not found"); } + /** + * This method was added because EntityId is NOT always equal to ChainId. + * Hence, it provides the logic to greedily detect the "true" Entity for a + * given chainId where discrepancies exist. + * + * @param chainId + * @return + */ + public Entity getEntityByMostOptimalMatchedId(String chainId) + { + // System.out.println("---> advanced greedy entityId matching block entered.."); + List entities = siftsEntry.getEntity(); + SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; + int count = 0; + for (Entity entity : entities) + { + sPojo[count] = new SiftsEntitySortPojo(); + sPojo[count].entityId = entity.getEntityId(); + + List segments = entity.getSegment(); + for (Segment segment : segments) + { + List residues = segment.getListResidue().getResidue(); + for (Residue residue : residues) + { + List cRefDbs = residue.getCrossRefDb(); + for (CrossRefDb cRefDb : cRefDbs) + { + if (!cRefDb.getDbSource().equalsIgnoreCase("PDB")) + { + continue; + } + ++sPojo[count].resCount; + if (cRefDb.getDbChainId().equalsIgnoreCase(chainId)) + { + ++sPojo[count].chainIdFreq; + } + } + } + } + sPojo[count].pid = 100 * (sPojo[count].chainIdFreq / sPojo[count].resCount); + ++count; + } + Arrays.sort(sPojo, Collections.reverseOrder()); + System.out.println("highest matched entity : " + sPojo[0].entityId); + System.out.println("highest matched pid : " + sPojo[0].pid); + + if (sPojo[0].entityId != null) + { + for (Entity entity : entities) + { + if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) + { + continue; + } + return entity; + } + } + return null; + } + + public class SiftsEntitySortPojo implements + Comparable + { + public String entityId; + + public int chainIdFreq; + + public int pid; + + public int resCount; + + @Override + public int compareTo(SiftsEntitySortPojo o) + { + return this.pid - o.pid; + } + } + @Override public String[] getEntryDBs() { @@ -551,6 +903,7 @@ public class SiftsClient implements SiftsClientI @Override public StringBuffer getMappingOutput(MappingOutputPojo mp) + throws SiftsException { String seqRes = mp.getSeqResidue(); String seqName = mp.getSeqName(); @@ -561,20 +914,20 @@ public class SiftsClient implements SiftsClientI String strName = mp.getStrName(); int pdbStart = mp.getStrStart(); int pdbEnd = mp.getStrEnd(); - + String type = mp.getType(); - + int maxid = (seqName.length() >= strName.length()) ? seqName.length() : strName.length(); int len = 72 - maxid - 1; - // int nochunks = 2;// mp.getWrapHeight(); int nochunks = ((seqRes.length()) / len) + ((seqRes.length()) % len > 0 ? 1 : 0); // output mappings StringBuffer output = new StringBuffer(); output.append(NEWLINE); - output.append("Sequence ⟷ Structure mapping details:"); + output.append("Sequence ⟷ Structure mapping details").append(NEWLINE); + output.append("Method: SIFTS"); output.append(NEWLINE).append(NEWLINE); output.append(new Format("%" + maxid + "s").form(seqName)); @@ -590,8 +943,8 @@ public class SiftsClient implements SiftsClientI output.append(" - "); output.append(String.valueOf(pdbEnd)); output.append(NEWLINE).append(NEWLINE); - - float pid = 0; + + int matchedSeqCount = 0; for (int j = 0; j < nochunks; j++) { // Print the first aligned sequence @@ -612,31 +965,38 @@ public class SiftsClient implements SiftsClientI // Print out the matching chars for (int i = 0; i < len; i++) { - if ((i + (j * len)) < seqRes.length()) + try { - if (seqRes.charAt(i + (j * len)) == strRes.charAt(i + (j * len)) - && !jalview.util.Comparison.isGap(seqRes.charAt(i - + (j * len)))) - { - pid++; - output.append("|"); - } - else if (type.equals("pep")) + if ((i + (j * len)) < seqRes.length()) { - if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), - strRes.charAt(i + (j * len))) > 0) + if (seqRes.charAt(i + (j * len)) == strRes + .charAt(i + (j * len)) + && !jalview.util.Comparison.isGap(seqRes.charAt(i + + (j * len)))) { - output.append("."); + matchedSeqCount++; + output.append("|"); + } + else if (type.equals("pep")) + { + if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), + strRes.charAt(i + (j * len))) > 0) + { + output.append("."); + } + else + { + output.append(" "); + } } else { output.append(" "); } } - else - { - output.append(" "); - } + } catch (IndexOutOfBoundsException e) + { + continue; } } // Now print the second aligned sequence @@ -652,15 +1012,18 @@ public class SiftsClient implements SiftsClientI } output.append(NEWLINE).append(NEWLINE); } - pid = pid / (seqRes.length()) * 100; - output.append("Length of alignment = " + seqRes.length()) - .append(NEWLINE); + float pid = (float) matchedSeqCount / seqRes.length() * 100; + if (pid < SiftsSettings.getFailSafePIDThreshold()) + { + throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); + } + output.append("Length of alignment = " + seqRes.length()).append( + NEWLINE); output.append(new Format("Percentage ID = %2.2f").form(pid)); output.append(NEWLINE); - output.append("Mapping method: SIFTS").append(NEWLINE); return output; } - + @Override public int getEntityCount() { @@ -696,4 +1059,5 @@ public class SiftsClient implements SiftsClientI { return siftsEntry.getDbVersion(); } + }