X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=27db6041212d967b45ed1d9de1b12db7f91ab0c1;hb=205e37313c0bb47069ab0e0579b9b65e68a8dfdc;hp=245d38ff580bfbc998452fc6578930e36c185f2e;hpb=8449c7161636af09d658905fe3affbc84fd10150;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 245d38f..27db604 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -26,8 +26,11 @@ import jalview.api.SiftsClientI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.SequenceI; +import jalview.io.StructureFile; import jalview.schemes.ResidueProperties; import jalview.structure.StructureMapping; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; import jalview.util.Format; import jalview.xml.binding.sifts.Entry; import jalview.xml.binding.sifts.Entry.Entity; @@ -36,49 +39,49 @@ import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; -import jalview.xml.binding.sifts.Entry.ListDB.Db; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.net.URL; import java.net.URLConnection; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.TreeMap; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; -import javax.xml.stream.FactoryConfigurationError; import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import MCview.Atom; import MCview.PDBChain; -import MCview.PDBfile; public class SiftsClient implements SiftsClientI { private Entry siftsEntry; - private PDBfile pdb; + private StructureFile pdb; private String pdbId; private String structId; - private String segStartEnd; - private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; private static final int BUFFER_SIZE = 4096; @@ -89,15 +92,9 @@ public class SiftsClient implements SiftsClientI private static final int PDB_ATOM_POS = 1; - private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; - - public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System - .getProperty("user.home") - + File.separatorChar - + ".sifts_downloads" + File.separatorChar; + private static final String NOT_OBSERVED = "Not_Observed"; - public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache - .getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR); + private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; private final static String NEWLINE = System.lineSeparator(); @@ -105,7 +102,7 @@ public class SiftsClient implements SiftsClientI private HashSet curDBRefAccessionIdsString; - public enum CoordinateSys + private enum CoordinateSys { UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); private String name; @@ -121,7 +118,7 @@ public class SiftsClient implements SiftsClientI } }; - public enum ResidueDetailType + private enum ResidueDetailType { NAME_SEC_STRUCTURE("nameSecondaryStructure"), CODE_SEC_STRUCTURE( "codeSecondaryStructure"), ANNOTATION("Annotation"); @@ -139,37 +136,21 @@ public class SiftsClient implements SiftsClientI }; /** - * Fetch SIFTs file for the given PDB Id and construct an instance of + * Fetch SIFTs file for the given PDBfile and construct an instance of * SiftsClient * * @param pdbId * @throws SiftsException */ - public SiftsClient(PDBfile pdb) throws SiftsException + public SiftsClient(StructureFile pdb) throws SiftsException { this.pdb = pdb; - this.pdbId = pdb.id; + this.pdbId = pdb.getId(); File siftsFile = getSiftsFile(pdbId); siftsEntry = parseSIFTs(siftsFile); } /** - * Construct an instance of SiftsClient using the supplied SIFTs file - the - * SIFTs file should correspond to the given PDB Id - * - * @param pdbId - * @param siftsFile - * @throws SiftsException - * @throws Exception - */ - public SiftsClient(PDBfile pdb, File siftsFile) throws SiftsException - { - this.pdb = pdb; - this.pdbId = pdb.id; - siftsEntry = parseSIFTs(siftsFile); - } - - /** * Parse the given SIFTs File and return a JAXB POJO of parsed data * * @param siftFile @@ -180,33 +161,16 @@ public class SiftsClient implements SiftsClientI */ private Entry parseSIFTs(File siftFile) throws SiftsException { - try + try (InputStream in = new FileInputStream(siftFile); + GZIPInputStream gzis = new GZIPInputStream(in);) { - System.out.println("File : " + siftFile.getAbsolutePath()); + // System.out.println("File : " + siftFile.getAbsolutePath()); JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); - InputStream in = new FileInputStream(siftFile); - GZIPInputStream gzis = new GZIPInputStream(in); XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); return (Entry) um.unmarshal(streamReader); - } catch (JAXBException e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); - } catch (FileNotFoundException e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); - } catch (XMLStreamException e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); - } catch (FactoryConfigurationError e) - { - e.printStackTrace(); - throw new SiftsException(e.getMessage()); - } catch (IOException e) + } catch (Exception e) { e.printStackTrace(); throw new SiftsException(e.getMessage()); @@ -214,7 +178,8 @@ public class SiftsClient implements SiftsClientI } /** - * Get a SIFTs XML file for a given PDB Id + * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP + * repository if not found in cache * * @param pdbId * @return SIFTs XML file @@ -222,58 +187,110 @@ public class SiftsClient implements SiftsClientI */ public static File getSiftsFile(String pdbId) throws SiftsException { - File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() - + ".xml.gz"); + String siftsFileName = SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"; + File siftsFile = new File(siftsFileName); if (siftsFile.exists()) { - // TODO it may be worth performing an age check to determine if a - // new SIFTs file should be re-downloaded as SIFTs entries are usually - // updated weekly + // The line below is required for unit testing... don't comment it out!!! System.out.println(">>> SIFTS File already downloaded for " + pdbId); - return siftsFile; + + if (isFileOlderThanThreshold(siftsFile, + SiftsSettings.getCacheThresholdInDays())) + { + File oldSiftsFile = new File(siftsFileName + "_old"); + siftsFile.renameTo(oldSiftsFile); + try + { + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + oldSiftsFile.delete(); + return siftsFile; + } catch (IOException e) + { + e.printStackTrace(); + oldSiftsFile.renameTo(siftsFile); + return new File(siftsFileName); + } + } + } + try + { + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + } catch (IOException e) + { + throw new SiftsException(e.getMessage()); } - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); return siftsFile; } /** - * Download a SIFTs XML file for a given PDB Id + * This method enables checking if a cached file has exceeded a certain + * threshold(in days) + * + * @param file + * the cached file + * @param noOfDays + * the threshold in days + * @return + */ + public static boolean isFileOlderThanThreshold(File file, int noOfDays) + { + Path filePath = file.toPath(); + BasicFileAttributes attr; + int diffInDays = 0; + try + { + attr = Files.readAttributes(filePath, BasicFileAttributes.class); + diffInDays = (int) ((new Date().getTime() - attr.lastModifiedTime() + .toMillis()) / (1000 * 60 * 60 * 24)); + // System.out.println("Diff in days : " + diffInDays); + } catch (IOException e) + { + e.printStackTrace(); + } + return noOfDays <= diffInDays; + } + + /** + * Download a SIFTs XML file for a given PDB Id from an FTP repository * * @param pdbId * @return downloaded SIFTs XML file * @throws SiftsException + * @throws IOException */ - public static File downloadSiftsFile(String pdbId) throws SiftsException + public static File downloadSiftsFile(String pdbId) throws SiftsException, + IOException { + if (pdbId.contains(".cif")) + { + pdbId = pdbId.replace(".cif", ""); + } String siftFile = pdbId + ".xml.gz"; String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; - String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile; - File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR); + String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() + + siftFile; + File siftsDownloadDir = new File( + SiftsSettings.getSiftDownloadDirectory()); if (!siftsDownloadDir.exists()) { siftsDownloadDir.mkdirs(); } - try + // System.out.println(">> Download ftp url : " + siftsFileFTPURL); + URL url = new URL(siftsFileFTPURL); + URLConnection conn = url.openConnection(); + InputStream inputStream = conn.getInputStream(); + FileOutputStream outputStream = new FileOutputStream( + downloadedSiftsFile); + byte[] buffer = new byte[BUFFER_SIZE]; + int bytesRead = -1; + while ((bytesRead = inputStream.read(buffer)) != -1) { - System.out.println(">> Download ftp url : " + siftsFileFTPURL); - URL url = new URL(siftsFileFTPURL); - URLConnection conn = url.openConnection(); - InputStream inputStream = conn.getInputStream(); - FileOutputStream outputStream = new FileOutputStream( - downloadedSiftsFile); - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead = -1; - while ((bytesRead = inputStream.read(buffer)) != -1) - { - outputStream.write(buffer, 0, bytesRead); - } - outputStream.close(); - inputStream.close(); - System.out.println(">>> File downloaded : " + downloadedSiftsFile); - } catch (IOException ex) - { - throw new SiftsException(ex.getMessage()); + outputStream.write(buffer, 0, bytesRead); } + outputStream.close(); + inputStream.close(); + // System.out.println(">>> File downloaded : " + downloadedSiftsFile); return new File(downloadedSiftsFile); } @@ -286,8 +303,8 @@ public class SiftsClient implements SiftsClientI */ public static boolean deleteSiftsFileByPDBId(String pdbId) { - File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() - + ".xml.gz"); + File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"); if (siftsFile.exists()) { return siftsFile.delete(); @@ -295,7 +312,6 @@ public class SiftsClient implements SiftsClientI return true; } - /** * Get a valid SIFTs DBRef for the given sequence current SIFTs entry * @@ -308,60 +324,41 @@ public class SiftsClient implements SiftsClientI public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws SiftsException { - DBRefEntryI sourceDBRef = null; - sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) + List dbRefs = seq.getPrimaryDBRefs(); + if (dbRefs == null || dbRefs.size() < 1) { - return sourceDBRef; + throw new SiftsException( + "Source DBRef could not be determined. DBRefs might not have been retrieved."); } - else + + for (DBRefEntry dbRef : dbRefs) { - DBRefEntry[] dbRefs = seq.getDBRefs(); - if (dbRefs == null || dbRefs.length < 1) + if (dbRef == null || dbRef.getAccessionId() == null + || dbRef.getSource() == null) { - final SequenceI[] seqs = new SequenceI[] { seq }; - new jalview.ws.DBRefFetcher(seqs, null, null, null, false) - .fetchDBRefs(true); - dbRefs = seq.getDBRefs(); + continue; } - - if (dbRefs == null || dbRefs.length < 1) + String canonicalSource = DBRefUtils.getCanonicalName(dbRef + .getSource()); + if (isValidDBRefEntry(dbRef) + && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT) || canonicalSource + .equalsIgnoreCase(DBRefSource.PDB))) { - throw new SiftsException("Could not get source DB Ref"); - } - - for (DBRefEntryI dbRef : dbRefs) - { - if (dbRef == null || dbRef.getAccessionId() == null - || dbRef.getSource() == null) - { - continue; - } - if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef - .getSource().equalsIgnoreCase(DBRefSource.PDB))) - { - return dbRef; - } + return dbRef; } } - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) - { - return sourceDBRef; - } throw new SiftsException("Could not get source DB Ref"); } - /** - * Check that the DBRef Entry is properly populated and is available in the - * instantiated SIFTs Entry + * Check that the DBRef Entry is properly populated and is available in this + * SiftClient instance * * @param entry * - DBRefEntry to validate * @return true validation is successful otherwise false is returned. */ - private boolean isValidDBRefEntry(DBRefEntryI entry) + boolean isValidDBRefEntry(DBRefEntryI entry) { return entry != null && entry.getAccessionId() != null && isFoundInSiftsEntry(entry.getAccessionId()); @@ -381,7 +378,8 @@ public class SiftsClient implements SiftsClientI .getMapRegion(); for (MapRegion mapRegion : mapRegions) { - accessions.add(mapRegion.getDb().getDbAccessionId()); + accessions + .add(mapRegion.getDb().getDbAccessionId().toLowerCase()); } } } @@ -393,8 +391,8 @@ public class SiftsClient implements SiftsClientI String pdbFile, String chain) throws SiftsException { structId = (chain == null) ? pdbId : pdbId + "|" + chain; - System.out.println("Getting mapping for: " + pdbId + "|" + chain - + " : seq- " + seq.getName()); + System.out.println("Getting SIFTS mapping for " + structId + ": seq " + + seq.getName()); final StringBuilder mappingDetails = new StringBuilder(128); PrintStream ps = new PrintStream(System.out) @@ -411,34 +409,30 @@ public class SiftsClient implements SiftsClientI mappingDetails.append(NEWLINE); } }; - int[][] mapping = getGreedyMapping(chain, seq, ps); + HashMap mapping = getGreedyMapping(chain, seq, ps); String mappingOutput = mappingDetails.toString(); StructureMapping siftsMapping = new StructureMapping(seq, pdbFile, - pdbId, chain, mapping, - mappingOutput); + pdbId, chain, mapping, mappingOutput); return siftsMapping; } @Override - public int[][] getGreedyMapping(String entityId, SequenceI seq, - java.io.PrintStream os) - throws SiftsException + public HashMap getGreedyMapping(String entityId, + SequenceI seq, java.io.PrintStream os) throws SiftsException { - System.out.println("Generating mappings for : " + entityId); + List omitNonObserved = new ArrayList(); + int nonObservedShiftIndex = 0; + // System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); String originalSeq = AlignSeq.extractGaps( - jalview.util.Comparison.GapChars, - seq.getSequenceAsString()); - int mapping[][] = new int[originalSeq.length() + seq.getStart()][2]; - DBRefEntryI sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef == null) - { - sourceDBRef = getValidSourceDBRef(seq); - // TODO ensure sequence start/end is in the same coordinate system and - // consistent with the choosen sourceDBRef - } + jalview.util.Comparison.GapChars, seq.getSequenceAsString()); + HashMap mapping = new HashMap(); + DBRefEntryI sourceDBRef; + sourceDBRef = getValidSourceDBRef(seq); + // TODO ensure sequence start/end is in the same coordinate system and + // consistent with the choosen sourceDBRef // set sequence coordinate system - default value is UniProt if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) @@ -456,19 +450,95 @@ public class SiftsClient implements SiftsClientI curDBRefAccessionIdsString = dbRefAccessionIdsString; curSourceDBRef = sourceDBRef.getAccessionId(); - // initialise all mapping positions to unassigned - for (int residuePos[] : mapping) - { - residuePos[PDB_RES_POS] = UNASSIGNED; - residuePos[PDB_ATOM_POS] = UNASSIGNED; - } TreeMap resNumMap = new TreeMap(); List segments = entity.getSegment(); + SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, + omitNonObserved, nonObservedShiftIndex); + processSegments(segments, shp); + try + { + populateAtomPositions(entityId, mapping); + } catch (Exception e) + { + e.printStackTrace(); + } + if (seqCoordSys == CoordinateSys.UNIPROT) + { + padWithGaps(resNumMap, omitNonObserved); + } + int seqStart = UNASSIGNED; + int seqEnd = UNASSIGNED; + int pdbStart = UNASSIGNED; + int pdbEnd = UNASSIGNED; + + if (mapping.isEmpty()) + { + throw new SiftsException("SIFTS mapping failed"); + } + + Integer[] keys = mapping.keySet().toArray(new Integer[0]); + Arrays.sort(keys); + seqStart = keys[0]; + seqEnd = keys[keys.length - 1]; + + String matchedSeq = originalSeq; + if (seqStart != UNASSIGNED) + { + pdbStart = mapping.get(seqStart)[PDB_RES_POS]; + pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; + int orignalSeqStart = seq.getStart(); + if (orignalSeqStart >= 1) + { + int subSeqStart = (seqStart >= orignalSeqStart) ? seqStart + - orignalSeqStart : 0; + int subSeqEnd = seqEnd - (orignalSeqStart - 1); + subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() + : subSeqEnd; + matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); + } + else + { + matchedSeq = originalSeq.substring(1, originalSeq.length()); + } + } + + StringBuilder targetStrucSeqs = new StringBuilder(); + for (String res : resNumMap.values()) + { + targetStrucSeqs.append(res); + } + + if (os != null) + { + MappingOutputPojo mop = new MappingOutputPojo(); + mop.setSeqStart(seqStart); + mop.setSeqEnd(seqEnd); + mop.setSeqName(seq.getName()); + mop.setSeqResidue(matchedSeq); + + mop.setStrStart(pdbStart); + mop.setStrEnd(pdbEnd); + mop.setStrName(structId); + mop.setStrResidue(targetStrucSeqs.toString()); + + mop.setType("pep"); + os.print(getMappingOutput(mop).toString()); + os.println(); + } + return mapping; + } + + void processSegments(List segments, SegmentHelperPojo shp) + { + SequenceI seq = shp.getSeq(); + HashMap mapping = shp.getMapping(); + TreeMap resNumMap = shp.getResNumMap(); + List omitNonObserved = shp.getOmitNonObserved(); + int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); for (Segment segment : segments) { - segStartEnd = segment.getStart() + " - " + segment.getEnd(); - System.out.println("Mappging segments : " + segment.getSegId() + "\\" - + segStartEnd); + // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s + // + segStartEnd); List residues = segment.getListResidue().getResidue(); for (Residue residue : residues) { @@ -483,12 +553,20 @@ public class SiftsClient implements SiftsClientI } if (cRefDb.getDbCoordSys() .equalsIgnoreCase(seqCoordSys.getName()) - && hasAccessionId(cRefDb.getDbAccessionId())) + && isAccessionMatched(cRefDb.getDbAccessionId())) { String resNumIndexString = cRefDb.getDbResNum() .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED) : cRefDb.getDbResNum(); - currSeqIndex = Integer.valueOf(resNumIndexString); + try + { + currSeqIndex = Integer.valueOf(resNumIndexString); + } catch (NumberFormatException nfe) + { + currSeqIndex = Integer.valueOf(resNumIndexString + .split("[a-zA-Z]")[0]); + continue; + } if (pdbRefDb != null) { break;// exit loop if pdb and uniprot are already found @@ -499,252 +577,363 @@ public class SiftsClient implements SiftsClientI { continue; } - if (currSeqIndex > seq.getStart() && currSeqIndex <= seq.getEnd()) + if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) { int resNum; try { resNum = (pdbRefDb == null) ? Integer.valueOf(residue - .getDbResNum()) : Integer.valueOf(pdbRefDb.getDbResNum()); + .getDbResNum()) : Integer.valueOf(pdbRefDb + .getDbResNum()); } catch (NumberFormatException nfe) { resNum = (pdbRefDb == null) ? Integer.valueOf(residue .getDbResNum()) : Integer.valueOf(pdbRefDb .getDbResNum().split("[a-zA-Z]")[0]); + continue; } - try + + if (isResidueObserved(residue) + || seqCoordSys == CoordinateSys.UNIPROT) { - mapping[currSeqIndex][PDB_RES_POS] = Integer - .valueOf(resNum); - } catch (ArrayIndexOutOfBoundsException e) + char resCharCode = ResidueProperties + .getSingleCharacterCode(ResidueProperties + .getCanonicalAminoAcid(residue.getDbResName())); + resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + } + else { - // do nothing.. + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; } - char resCharCode = ResidueProperties - .getSingleCharacterCode(residue.getDbResName()); - resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + mapping.put(currSeqIndex - nonObservedShiftIndex, new int[] { + Integer.valueOf(resNum), UNASSIGNED }); } } } + } + + /** + * + * @param chainId + * Target chain to populate mapping of its atom positions. + * @param mapping + * Two dimension array of residue index versus atom position + * @throws IllegalArgumentException + * Thrown if chainId or mapping is null + * @throws SiftsException + */ + void populateAtomPositions(String chainId, Map mapping) + throws IllegalArgumentException, SiftsException + { try { - populateAtomPositions(entityId, mapping); - } catch (Exception e) - { - e.printStackTrace(); - } - padWithGaps(resNumMap); - int counter = 0; - int seqStart = UNASSIGNED; - int seqEnd = UNASSIGNED; - int pdbStart = UNASSIGNED; - int pdbEnd = UNASSIGNED; - boolean startDetected = false; - for (int[] x : mapping) - { - if (!startDetected && x[PDB_RES_POS] != UNASSIGNED) - { - seqStart = counter; - startDetected = true; - // System.out.println("Seq start: "+ seqStart); - } + PDBChain chain = pdb.findChain(chainId); - if (startDetected && x[PDB_RES_POS] != UNASSIGNED) + if (chain == null || mapping == null) { - seqEnd = counter; + throw new IllegalArgumentException( + "Chain id or mapping must not be null."); } - ++counter; - } - - String matchedSeq = originalSeq; - if (seqStart != UNASSIGNED) - { - seqEnd = (seqEnd == UNASSIGNED) ? counter : seqEnd; - pdbStart = mapping[seqStart][PDB_RES_POS]; - pdbEnd = mapping[seqEnd][PDB_RES_POS]; - int orignalSeqStart = seq.getStart(); - if (orignalSeqStart >= 1) + for (int[] map : mapping.values()) { - int subSeqStart = seqStart - orignalSeqStart; - int subSeqEnd = seqEnd - (orignalSeqStart - 1); - matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); + if (map[PDB_RES_POS] != UNASSIGNED) + { + map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + } } + } catch (NullPointerException e) + { + throw new SiftsException(e.getMessage()); + } catch (Exception e) + { + throw new SiftsException(e.getMessage()); } + } - StringBuilder targetStrucSeqs = new StringBuilder(); - for (String res : resNumMap.values()) + /** + * + * @param residueIndex + * The residue index used for the search + * @param atoms + * A collection of Atom to search + * @return atom position for the given residue index + */ + int getAtomIndex(int residueIndex, Collection atoms) + { + if (atoms == null) { - targetStrucSeqs.append(res); + throw new IllegalArgumentException( + "atoms collection must not be null!"); } - - if (os != null) + for (Atom atom : atoms) { - MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(seqStart); - mop.setSeqEnd(seqEnd); - mop.setSeqName(seq.getName()); - mop.setSeqResidue(matchedSeq); - - mop.setStrStart(pdbStart); - mop.setStrEnd(pdbEnd); - mop.setStrName(structId); - mop.setStrResidue(targetStrucSeqs.toString()); - - mop.setType("pep"); - os.print(getMappingOutput(mop).toString()); + if (atom.resNumber == residueIndex) + { + return atom.atomIndex; + } } - return mapping; + return UNASSIGNED; } + /** + * Checks if the residue instance is marked 'Not_observed' or not + * + * @param residue + * @return + */ private boolean isResidueObserved(Residue residue) { - String annotation = getResidueAnnotaiton(residue, + Set annotations = getResidueAnnotaitons(residue, ResidueDetailType.ANNOTATION); - if (annotation == null) + if (annotations == null || annotations.isEmpty()) { return true; } - if (!annotation.equalsIgnoreCase("Not_Found") - && annotation.equalsIgnoreCase("Not_Observed")) + for (String annotation : annotations) { - return false; + if (annotation.equalsIgnoreCase(NOT_OBSERVED)) + { + return false; + } } return true; } - private String getResidueAnnotaiton(Residue residue, + /** + * Get annotation String for a given residue and annotation type + * + * @param residue + * @param type + * @return + */ + private Set getResidueAnnotaitons(Residue residue, ResidueDetailType type) { + HashSet foundAnnotations = new HashSet(); List resDetails = residue.getResidueDetail(); for (ResidueDetail resDetail : resDetails) { if (resDetail.getProperty().equalsIgnoreCase(type.getCode())) { - return resDetail.getContent(); + foundAnnotations.add(resDetail.getContent()); } } - return "Not_Found"; + return foundAnnotations; } - private boolean hasAccessionId(String accession) + @Override + public boolean isAccessionMatched(String accession) { boolean isStrictMatch = true; return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) : curDBRefAccessionIdsString.contains(accession.toLowerCase()); } - @Override - public boolean isFoundInSiftsEntry(String accessionId) + private boolean isFoundInSiftsEntry(String accessionId) { + Set siftsDBRefs = getAllMappingAccession(); return accessionId != null - && getAllMappingAccession().contains(accessionId); + && siftsDBRefs.contains(accessionId.toLowerCase()); } /** - * Pads missing positions with gaps + * Pad omitted residue positions in PDB sequence with gaps * * @param resNumMap */ - void padWithGaps(TreeMap resNumMap) + void padWithGaps(Map resNumMap, + List omitNonObserved) { if (resNumMap == null || resNumMap.isEmpty()) { return; } Integer[] keys = resNumMap.keySet().toArray(new Integer[0]); - Arrays.sort(keys); + // Arrays.sort(keys); int firstIndex = keys[0]; int lastIndex = keys[keys.length - 1]; - System.out.println("Min value " + firstIndex); - System.out.println("Max value " + lastIndex); + // System.out.println("Min value " + firstIndex); + // System.out.println("Max value " + lastIndex); for (int x = firstIndex; x <= lastIndex; x++) { - if (!resNumMap.containsKey(x)) + if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x)) { resNumMap.put(x, "-"); } } } - /** - * - * @param chainId - * Target chain to populate mapping of its atom positions. - * @param mapping - * Two dimension array of residue index versus atom position - * @throws IllegalArgumentException - * Thrown if chainId or mapping is null - */ - void populateAtomPositions(String chainId, int[][] mapping) - throws IllegalArgumentException + @Override + public Entity getEntityById(String id) throws SiftsException { - PDBChain chain = pdb.findChain(chainId); - if (chain == null || mapping == null) + // Determines an entity to process by performing a heuristic matching of all + // Entities with the given chainId and choosing the best matching Entity + Entity entity = getEntityByMostOptimalMatchedId(id); + if (entity != null) { - throw new IllegalArgumentException( - "Chain id or mapping must not be null."); - } - for (int[] map : mapping) - { - if (map[PDB_RES_POS] != UNASSIGNED) - { - map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); - } + return entity; } + throw new SiftsException("Entity " + id + " not found"); } /** + * This method was added because EntityId is NOT always equal to ChainId. + * Hence, it provides the logic to greedily detect the "true" Entity for a + * given chainId where discrepancies exist. * - * @param residueIndex - * The residue index used for the search - * @param atoms - * A collection of Atom to search - * @return atom position for the given residue index + * @param chainId + * @return */ - int getAtomIndex(int residueIndex, Collection atoms) + public Entity getEntityByMostOptimalMatchedId(String chainId) { - if (atoms == null) + // System.out.println("---> advanced greedy entityId matching block entered.."); + List entities = siftsEntry.getEntity(); + SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; + int count = 0; + for (Entity entity : entities) { - throw new IllegalArgumentException( - "atoms collection must not be null!"); + sPojo[count] = new SiftsEntitySortPojo(); + sPojo[count].entityId = entity.getEntityId(); + + List segments = entity.getSegment(); + for (Segment segment : segments) + { + List residues = segment.getListResidue().getResidue(); + for (Residue residue : residues) + { + List cRefDbs = residue.getCrossRefDb(); + for (CrossRefDb cRefDb : cRefDbs) + { + if (!cRefDb.getDbSource().equalsIgnoreCase("PDB")) + { + continue; + } + ++sPojo[count].resCount; + if (cRefDb.getDbChainId().equalsIgnoreCase(chainId)) + { + ++sPojo[count].chainIdFreq; + } + } + } + } + sPojo[count].pid = (100 * sPojo[count].chainIdFreq) + / sPojo[count].resCount; + ++count; } - for (Atom atom : atoms) + Arrays.sort(sPojo, Collections.reverseOrder()); + // System.out.println("highest matched entity : " + sPojo[0].entityId); + // System.out.println("highest matched pid : " + sPojo[0].pid); + + if (sPojo[0].entityId != null) { - if (atom.resNumber == residueIndex) + if (sPojo[0].pid < 1) { - return atom.atomIndex; + return null; + } + for (Entity entity : entities) + { + if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) + { + continue; + } + return entity; } } - return UNASSIGNED; + return null; } - @Override - public Entity getEntityById(String id) throws SiftsException + private class SiftsEntitySortPojo implements + Comparable { - List entities = siftsEntry.getEntity(); - for (Entity entity : entities) + public String entityId; + + public int chainIdFreq; + + public int pid; + + public int resCount; + + @Override + public int compareTo(SiftsEntitySortPojo o) { - if (!entity.getEntityId().equalsIgnoreCase(id)) - { - continue; - } - return entity; + return this.pid - o.pid; } - throw new SiftsException("Entity " + id + " not found"); } - @Override - public String[] getEntryDBs() + private class SegmentHelperPojo { - System.out.println("\nListing DB entries..."); - List availDbs = new ArrayList(); - List dbs = siftsEntry.getListDB().getDb(); - for (Db db : dbs) + private SequenceI seq; + + private HashMap mapping; + + private TreeMap resNumMap; + + private List omitNonObserved; + + private int nonObservedShiftIndex; + + public SegmentHelperPojo(SequenceI seq, + HashMap mapping, + TreeMap resNumMap, + List omitNonObserved, int nonObservedShiftIndex) + { + setSeq(seq); + setMapping(mapping); + setResNumMap(resNumMap); + setOmitNonObserved(omitNonObserved); + setNonObservedShiftIndex(nonObservedShiftIndex); + } + + public SequenceI getSeq() + { + return seq; + } + + public void setSeq(SequenceI seq) + { + this.seq = seq; + } + + public HashMap getMapping() + { + return mapping; + } + + public void setMapping(HashMap mapping) + { + this.mapping = mapping; + } + + public TreeMap getResNumMap() { - availDbs.add(db.getDbSource()); - System.out.println(db.getDbSource() + " | " + db.getDbCoordSys()); + return resNumMap; + } + + public void setResNumMap(TreeMap resNumMap) + { + this.resNumMap = resNumMap; + } + + public List getOmitNonObserved() + { + return omitNonObserved; + } + + public void setOmitNonObserved(List omitNonObserved) + { + this.omitNonObserved = omitNonObserved; + } + + public int getNonObservedShiftIndex() + { + return nonObservedShiftIndex; + } + + public void setNonObservedShiftIndex(int nonObservedShiftIndex) + { + this.nonObservedShiftIndex = nonObservedShiftIndex; } - return availDbs.toArray(new String[0]); } @Override @@ -760,9 +949,9 @@ public class SiftsClient implements SiftsClientI String strName = mp.getStrName(); int pdbStart = mp.getStrStart(); int pdbEnd = mp.getStrEnd(); - + String type = mp.getType(); - + int maxid = (seqName.length() >= strName.length()) ? seqName.length() : strName.length(); int len = 72 - maxid - 1; @@ -772,7 +961,8 @@ public class SiftsClient implements SiftsClientI // output mappings StringBuffer output = new StringBuffer(); output.append(NEWLINE); - output.append("Sequence ⟷ Structure mapping details").append(NEWLINE); + output.append("Sequence \u27f7 Structure mapping details").append( + NEWLINE); output.append("Method: SIFTS"); output.append(NEWLINE).append(NEWLINE); @@ -789,7 +979,7 @@ public class SiftsClient implements SiftsClientI output.append(" - "); output.append(String.valueOf(pdbEnd)); output.append(NEWLINE).append(NEWLINE); - + int matchedSeqCount = 0; for (int j = 0; j < nochunks; j++) { @@ -813,32 +1003,35 @@ public class SiftsClient implements SiftsClientI { try { - if ((i + (j * len)) < seqRes.length()) - { - if (seqRes.charAt(i + (j * len)) == strRes.charAt(i + (j * len)) - && !jalview.util.Comparison.isGap(seqRes.charAt(i - + (j * len)))) + if ((i + (j * len)) < seqRes.length()) { + boolean sameChar = Comparison.isSameResidue( + seqRes.charAt(i + (j * len)), + strRes.charAt(i + (j * len)), false); + if (sameChar + && !jalview.util.Comparison.isGap(seqRes.charAt(i + + (j * len)))) + { matchedSeqCount++; - output.append("|"); - } - else if (type.equals("pep")) - { - if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), - strRes.charAt(i + (j * len))) > 0) + output.append("|"); + } + else if (type.equals("pep")) { - output.append("."); + if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), + strRes.charAt(i + (j * len))) > 0) + { + output.append("."); + } + else + { + output.append(" "); + } } else { output.append(" "); } } - else - { - output.append(" "); - } - } } catch (IndexOutOfBoundsException e) { continue; @@ -858,17 +1051,16 @@ public class SiftsClient implements SiftsClientI output.append(NEWLINE).append(NEWLINE); } float pid = (float) matchedSeqCount / seqRes.length() * 100; - if (pid < 2) + if (pid < SiftsSettings.getFailSafePIDThreshold()) { - throw new SiftsException("Low PID detected for SIFTs mapping..."); + throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); } - output.append("Length of alignment = " + seqRes.length()) - .append(NEWLINE); + output.append("Length of alignment = " + seqRes.length()).append( + NEWLINE); output.append(new Format("Percentage ID = %2.2f").form(pid)); - output.append(NEWLINE); return output; } - + @Override public int getEntityCount() { @@ -888,12 +1080,6 @@ public class SiftsClient implements SiftsClientI } @Override - public String getDbEvidence() - { - return siftsEntry.getDbEvidence(); - } - - @Override public String getDbSource() { return siftsEntry.getDbSource(); @@ -904,4 +1090,5 @@ public class SiftsClient implements SiftsClientI { return siftsEntry.getDbVersion(); } + }