X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=4fb9ca92ba97f36a4af5ad40d1b398f5c56b37a9;hb=9c7418155500b39b96d2d6f92d9ce12012b58622;hp=bde215da2a187639c968ad1946da67e1333c5905;hpb=89f62b210bafac2a3341ce3fa9a360d5ea6c1c3c;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index bde215d..4fb9ca9 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -18,104 +18,155 @@ * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ -package jalview.io; - -import jalview.analysis.AlignSeq; -import jalview.api.DBRefEntryI; -import jalview.api.SiftsClientI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.SequenceI; -import jalview.schemes.ResidueProperties; -import jalview.structure.StructureMapping; -import jalview.util.Format; -import jalview.xml.binding.sifts.Entry; -import jalview.xml.binding.sifts.Entry.Entity; -import jalview.xml.binding.sifts.Entry.Entity.Segment; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; -import jalview.xml.binding.sifts.Entry.EntryDetail; -import jalview.xml.binding.sifts.Entry.ListDB.Db; +package jalview.ws.sifts; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.net.URL; import java.net.URLConnection; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBException; +import javax.xml.bind.JAXBElement; import javax.xml.bind.Unmarshaller; -import javax.xml.stream.FactoryConfigurationError; import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; +import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.api.DBRefEntryI; +import jalview.api.SiftsClientI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.io.StructureFile; +import jalview.schemes.ResidueProperties; +import jalview.structure.StructureMapping; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; +import jalview.util.Format; +import jalview.util.Platform; +import jalview.xml.binding.sifts.Entry; +import jalview.xml.binding.sifts.Entry.Entity; +import jalview.xml.binding.sifts.Entry.Entity.Segment; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; +import mc_view.Atom; +import mc_view.PDBChain; + public class SiftsClient implements SiftsClientI { + /* + * for use in mocking out file fetch for tests only + * - reset to null after testing! + */ + private static File mockSiftsFile; + private Entry siftsEntry; + private StructureFile pdb; + private String pdbId; + private String structId; + + private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; + + /** + * PDB sequence position to sequence coordinate mapping as derived from SIFTS + * record for the identified SeqCoordSys Used for lift-over from sequence + * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence + * being annotated with PDB data + */ + private jalview.datamodel.Mapping seqFromPdbMapping; + private static final int BUFFER_SIZE = 4096; - private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; + public static final int UNASSIGNED = Integer.MIN_VALUE; + + private static final int PDB_RES_POS = 0; + + private static final int PDB_ATOM_POS = 1; - public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System - .getProperty("user.home") - + File.separatorChar - + ".sifts_downloads" + File.separatorChar; + private static final int PDBE_POS = 2; - public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache - .getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR); + private static final String NOT_OBSERVED = "Not_Observed"; + + private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; private final static String NEWLINE = System.lineSeparator(); - /** - * Fetch SIFTs file for the given PDB Id and construct an instance of - * SiftsClient - * - * @param pdbId - */ - public SiftsClient(String pdbId) + private String curSourceDBRef; + + private HashSet curDBRefAccessionIdsString; + + private enum CoordinateSys { - this.pdbId = pdbId; - try + UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); + private String name; + + private CoordinateSys(String name) { - File siftsFile = getSiftsFile(pdbId); - siftsEntry = parseSIFTs(siftsFile); - } catch (Exception e) + this.name = name; + } + + public String getName() { - e.printStackTrace(); + return name; } - } + }; - /** - * Construct an instance of SiftsClient using the supplied SIFTs file - - * the SIFTs file should correspond to the given PDB Id - * - * @param pdbId - * @param siftsFile - */ - public SiftsClient(String pdbId, File siftsFile) + private enum ResidueDetailType { - this.pdbId = pdbId; - try + NAME_SEC_STRUCTURE("nameSecondaryStructure"), + CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation"); + private String code; + + private ResidueDetailType(String code) { - siftsEntry = parseSIFTs(siftsFile); - } catch (Exception e) + this.code = code; + } + + public String getCode() { - e.printStackTrace(); + return code; } + }; + /** + * Fetch SIFTs file for the given PDBfile and construct an instance of + * SiftsClient + * + * @param pdbId + * @throws SiftsException + */ + public SiftsClient(StructureFile pdb) throws SiftsException + { + this.pdb = pdb; + this.pdbId = pdb.getId(); + File siftsFile = getSiftsFile(pdbId); + siftsEntry = parseSIFTs(siftsFile); } /** @@ -127,97 +178,170 @@ public class SiftsClient implements SiftsClientI * @throws Exception * if a problem occurs while parsing the SIFTs XML */ - private Entry parseSIFTs(File siftFile) throws Exception + private Entry parseSIFTs(File siftFile) throws SiftsException { - try + try (InputStream in = new FileInputStream(siftFile); + GZIPInputStream gzis = new GZIPInputStream(in);) { - System.out.println("File : " + siftFile.getAbsolutePath()); + // System.out.println("File : " + siftFile.getAbsolutePath()); JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); - InputStream in = new FileInputStream(siftFile); - GZIPInputStream gzis = new GZIPInputStream(in); XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); - return (Entry) um.unmarshal(streamReader); - } catch (JAXBException e) - { - e.printStackTrace(); - } catch (FileNotFoundException e) - { - e.printStackTrace(); - } catch (XMLStreamException e) - { - e.printStackTrace(); - } catch (FactoryConfigurationError e) - { - e.printStackTrace(); - } catch (IOException e) + JAXBElement jbe = um.unmarshal(streamReader, Entry.class); + return jbe.getValue(); + } catch (Exception e) { e.printStackTrace(); + throw new SiftsException(e.getMessage()); } - throw new Exception("Error parsing siftFile"); } /** - * Get a SIFTs XML file for a given PDB Id + * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP + * repository if not found in cache * * @param pdbId * @return SIFTs XML file + * @throws SiftsException */ - public static File getSiftsFile(String pdbId) + public static File getSiftsFile(String pdbId) throws SiftsException { - File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() - + ".xml.gz"); + /* + * return mocked file if it has been set + */ + if (mockSiftsFile != null) + { + return mockSiftsFile; + } + + String siftsFileName = SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"; + File siftsFile = new File(siftsFileName); if (siftsFile.exists()) { - // TODO it may be worth performing a timestamp age check to determine if a - // new SIFTs file should be re-downloaded as SIFTs entries are usually - // updated weekly + // The line below is required for unit testing... don't comment it out!!! System.out.println(">>> SIFTS File already downloaded for " + pdbId); - return siftsFile; + + if (isFileOlderThanThreshold(siftsFile, + SiftsSettings.getCacheThresholdInDays())) + { + File oldSiftsFile = new File(siftsFileName + "_old"); + siftsFile.renameTo(oldSiftsFile); + try + { + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + oldSiftsFile.delete(); + return siftsFile; + } catch (IOException e) + { + e.printStackTrace(); + oldSiftsFile.renameTo(siftsFile); + return new File(siftsFileName); + } + } + else + { + return siftsFile; + } + } + try + { + siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + } catch (IOException e) + { + throw new SiftsException(e.getMessage()); } - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); return siftsFile; } /** - * Download a SIFTs XML file for a given PDB Id + * This method enables checking if a cached file has exceeded a certain + * threshold(in days) + * + * @param file + * the cached file + * @param noOfDays + * the threshold in days + * @return + */ + public static boolean isFileOlderThanThreshold(File file, int noOfDays) + { + Path filePath = file.toPath(); + BasicFileAttributes attr; + int diffInDays = 0; + try + { + attr = Files.readAttributes(filePath, BasicFileAttributes.class); + diffInDays = (int) ((new Date().getTime() + - attr.lastModifiedTime().toMillis()) + / (1000 * 60 * 60 * 24)); + // System.out.println("Diff in days : " + diffInDays); + } catch (IOException e) + { + e.printStackTrace(); + } + return noOfDays <= diffInDays; + } + + /** + * Download a SIFTs XML file for a given PDB Id from an FTP repository * * @param pdbId * @return downloaded SIFTs XML file + * @throws SiftsException + * @throws IOException */ public static File downloadSiftsFile(String pdbId) + throws SiftsException, IOException { + if (pdbId.contains(".cif")) + { + pdbId = pdbId.replace(".cif", ""); + } String siftFile = pdbId + ".xml.gz"; String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; - String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile; - File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR); - if (!siftsDownloadDir.exists()) + + /* + * Download the file from URL to either + * Java: directory of cached downloaded SIFTS files + * Javascript: temporary 'file' (in-memory cache) + */ + File downloadTo = null; + if (Platform.isJS()) { - siftsDownloadDir.mkdirs(); + downloadTo = File.createTempFile(siftFile, ".xml.gz"); } - try + else { - System.out.println(">> Download ftp url : " + siftsFileFTPURL); - URL url = new URL(siftsFileFTPURL); - URLConnection conn = url.openConnection(); - InputStream inputStream = conn.getInputStream(); - FileOutputStream outputStream = new FileOutputStream( - downloadedSiftsFile); - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead = -1; - while ((bytesRead = inputStream.read(buffer)) != -1) + downloadTo = new File( + SiftsSettings.getSiftDownloadDirectory() + siftFile); + File siftsDownloadDir = new File( + SiftsSettings.getSiftDownloadDirectory()); + if (!siftsDownloadDir.exists()) { - outputStream.write(buffer, 0, bytesRead); + siftsDownloadDir.mkdirs(); } - outputStream.close(); - inputStream.close(); - System.out.println(">>> File downloaded : " + downloadedSiftsFile); - } catch (IOException ex) + } + + // System.out.println(">> Download ftp url : " + siftsFileFTPURL); + // long now = System.currentTimeMillis(); + URL url = new URL(siftsFileFTPURL); + URLConnection conn = url.openConnection(); + InputStream inputStream = conn.getInputStream(); + FileOutputStream outputStream = new FileOutputStream( + downloadTo); + byte[] buffer = new byte[BUFFER_SIZE]; + int bytesRead = -1; + while ((bytesRead = inputStream.read(buffer)) != -1) { - ex.printStackTrace(); + outputStream.write(buffer, 0, bytesRead); } - return new File(downloadedSiftsFile); + outputStream.close(); + inputStream.close(); + // System.out.println(">>> File downloaded : " + downloadedSiftsFile + // + " took " + (System.currentTimeMillis() - now) + "ms"); + return downloadTo; } /** @@ -229,8 +353,8 @@ public class SiftsClient implements SiftsClientI */ public static boolean deleteSiftsFileByPDBId(String pdbId) { - File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() - + ".xml.gz"); + File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() + + pdbId.toLowerCase() + ".xml.gz"); if (siftsFile.exists()) { return siftsFile.delete(); @@ -238,7 +362,6 @@ public class SiftsClient implements SiftsClientI return true; } - /** * Get a valid SIFTs DBRef for the given sequence current SIFTs entry * @@ -248,66 +371,47 @@ public class SiftsClient implements SiftsClientI * @throws Exception * if no valid source DBRefEntry was found for the given sequences */ - public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws Exception + public DBRefEntryI getValidSourceDBRef(SequenceI seq) + throws SiftsException { - DBRefEntryI sourceDBRef = null; - sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) + List dbRefs = seq.getPrimaryDBRefs(); + if (dbRefs == null || dbRefs.size() < 1) { - return sourceDBRef; + throw new SiftsException( + "Source DBRef could not be determined. DBRefs might not have been retrieved."); } - else + + for (DBRefEntry dbRef : dbRefs) { - DBRefEntry[] dbRefs = seq.getDBRefs(); - if (dbRefs == null || dbRefs.length < 1) + if (dbRef == null || dbRef.getAccessionId() == null + || dbRef.getSource() == null) { - final SequenceI[] seqs = new SequenceI[] { seq }; - new jalview.ws.DBRefFetcher(seqs, null, null, null, false) - .fetchDBRefs(true); - dbRefs = seq.getDBRefs(); + continue; } - - if (dbRefs == null || dbRefs.length < 1) + String canonicalSource = DBRefUtils + .getCanonicalName(dbRef.getSource()); + if (isValidDBRefEntry(dbRef) + && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT) + || canonicalSource.equalsIgnoreCase(DBRefSource.PDB))) { - throw new Exception("Could not get source DB Ref"); + return dbRef; } - - for (DBRefEntryI dbRef : dbRefs) - { - if (dbRef == null || dbRef.getAccessionId() == null - || dbRef.getSource() == null) - { - continue; - } - if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase("uniprot") || dbRef - .getSource().equalsIgnoreCase("pdb"))) - { - return dbRef; - } - } - } - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) - { - return sourceDBRef; } - throw new Exception("Could not get source DB Ref"); + throw new SiftsException("Could not get source DB Ref"); } - /** - * Check that the DBRef Entry is properly populated and is available in the - * instantiated SIFTs Entry + * Check that the DBRef Entry is properly populated and is available in this + * SiftClient instance * * @param entry * - DBRefEntry to validate * @return true validation is successful otherwise false is returned. */ - private boolean isValidDBRefEntry(DBRefEntryI entry) + boolean isValidDBRefEntry(DBRefEntryI entry) { return entry != null && entry.getAccessionId() != null && isFoundInSiftsEntry(entry.getAccessionId()); - // & entry.getStartRes() > 0; } @Override @@ -324,268 +428,819 @@ public class SiftsClient implements SiftsClientI .getMapRegion(); for (MapRegion mapRegion : mapRegions) { - accessions.add(mapRegion.getDb().getDbAccessionId()); + accessions + .add(mapRegion.getDb().getDbAccessionId().toLowerCase()); } } } return accessions; } + @Override + public StructureMapping getSiftsStructureMapping(SequenceI seq, + String pdbFile, String chain) throws SiftsException + { + SequenceI aseq = seq; + while (seq.getDatasetSequence() != null) + { + seq = seq.getDatasetSequence(); + } + structId = (chain == null) ? pdbId : pdbId + "|" + chain; + System.out.println("Getting SIFTS mapping for " + structId + ": seq " + + seq.getName()); + + final StringBuilder mappingDetails = new StringBuilder(128); + PrintStream ps = new PrintStream(System.out) + { + @Override + public void print(String x) + { + mappingDetails.append(x); + } + + @Override + public void println() + { + mappingDetails.append(NEWLINE); + } + }; + HashMap mapping = getGreedyMapping(chain, seq, ps); + + String mappingOutput = mappingDetails.toString(); + StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile, + pdbId, chain, mapping, mappingOutput, seqFromPdbMapping); + + return siftsMapping; + } @Override - public int[][] getGreedyMapping(String entityId, SequenceI seq, - java.io.PrintStream os) - throws Exception + public HashMap getGreedyMapping(String entityId, + SequenceI seq, java.io.PrintStream os) throws SiftsException { - System.out.println("Generating mappings for : " + entityId); + List omitNonObserved = new ArrayList<>(); + int nonObservedShiftIndex = 0,pdbeNonObserved=0; + // System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); - String seqStr = AlignSeq.extractGaps(jalview.util.Comparison.GapChars, - seq.getSequenceAsString()); - // StringBuilder mappedStrucSeq = new StringBuilder(seqStr.length()); - String[] mappedStrucSeq = new String[seqStr.length()]; - int mapping[][] = new int[seqStr.length()][2]; - DBRefEntryI sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef == null) - { - sourceDBRef = getValidSourceDBRef(seq); - // TODO if sourceDBRef is null at this point then throw an Exception + String originalSeq = AlignSeq.extractGaps( + jalview.util.Comparison.GapChars, seq.getSequenceAsString()); + HashMap mapping = new HashMap(); + DBRefEntryI sourceDBRef; + sourceDBRef = getValidSourceDBRef(seq); + // TODO ensure sequence start/end is in the same coordinate system and + // consistent with the choosen sourceDBRef - // TODO update sequence start/end with sourceDBRef start/end - // seq.setStart(sourceDBRef.getStartRes()); - // seq.setEnd(sourceDBRef.getEndRes()); + // set sequence coordinate system - default value is UniProt + if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) + { + seqCoordSys = CoordinateSys.PDB; } - String crossRefAccessionId = sourceDBRef.getAccessionId(); - int start = seq.getStart() - 1; - for (int residue[] : mapping) + HashSet dbRefAccessionIdsString = new HashSet(); + for (DBRefEntry dbref : seq.getDBRefs()) { - residue[1] = start++; + dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase()); } - - HashMap resNumMap = new HashMap(); + dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase()); + + curDBRefAccessionIdsString = dbRefAccessionIdsString; + curSourceDBRef = sourceDBRef.getAccessionId(); + + TreeMap resNumMap = new TreeMap(); List segments = entity.getSegment(); + SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, + omitNonObserved, nonObservedShiftIndex,pdbeNonObserved); + processSegments(segments, shp); + try + { + populateAtomPositions(entityId, mapping); + } catch (Exception e) + { + e.printStackTrace(); + } + if (seqCoordSys == CoordinateSys.UNIPROT) + { + padWithGaps(resNumMap, omitNonObserved); + } + int seqStart = UNASSIGNED; + int seqEnd = UNASSIGNED; + int pdbStart = UNASSIGNED; + int pdbEnd = UNASSIGNED; + + if (mapping.isEmpty()) + { + throw new SiftsException("SIFTS mapping failed"); + } + // also construct a mapping object between the seq-coord sys and the PDB seq's coord sys + + Integer[] keys = mapping.keySet().toArray(new Integer[0]); + Arrays.sort(keys); + seqStart = keys[0]; + seqEnd = keys[keys.length - 1]; + List from=new ArrayList<>(),to=new ArrayList<>(); + int[]_cfrom=null,_cto=null; + String matchedSeq = originalSeq; + if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb sequence that starts <-1 + { + for (int seqps:keys) + { + int pdbpos = mapping.get(seqps)[PDBE_POS]; + if (pdbpos == UNASSIGNED) + { + // not correct - pdbpos might be -1, but leave it for now + continue; + } + if (_cfrom==null || seqps!=_cfrom[1]+1) + { + _cfrom = new int[] { seqps,seqps}; + from.add(_cfrom); + _cto = null; // discontinuity + } else { + _cfrom[1]= seqps; + } + if (_cto==null || pdbpos!=1+_cto[1]) + { + _cto = new int[] { pdbpos,pdbpos}; + to.add(_cto); + } else { + _cto[1] = pdbpos; + } + } + _cfrom = new int[from.size() * 2]; + _cto = new int[to.size() * 2]; + int p = 0; + for (int[] range : from) + { + _cfrom[p++] = range[0]; + _cfrom[p++] = range[1]; + } + ; + p = 0; + for (int[] range : to) + { + _cto[p++] = range[0]; + _cto[p++] = range[1]; + } + ; + + seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom, + 1, + 1); + pdbStart = mapping.get(seqStart)[PDB_RES_POS]; + pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; + int orignalSeqStart = seq.getStart(); + if (orignalSeqStart >= 1) + { + int subSeqStart = (seqStart >= orignalSeqStart) + ? seqStart - orignalSeqStart + : 0; + int subSeqEnd = seqEnd - (orignalSeqStart - 1); + subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() + : subSeqEnd; + matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); + } + else + { + matchedSeq = originalSeq.substring(1, originalSeq.length()); + } + } + + StringBuilder targetStrucSeqs = new StringBuilder(); + for (String res : resNumMap.values()) + { + targetStrucSeqs.append(res); + } + + if (os != null) + { + MappingOutputPojo mop = new MappingOutputPojo(); + mop.setSeqStart(seqStart); + mop.setSeqEnd(seqEnd); + mop.setSeqName(seq.getName()); + mop.setSeqResidue(matchedSeq); + + mop.setStrStart(pdbStart); + mop.setStrEnd(pdbEnd); + mop.setStrName(structId); + mop.setStrResidue(targetStrucSeqs.toString()); + + mop.setType("pep"); + os.print(getMappingOutput(mop).toString()); + os.println(); + } + return mapping; + } + + void processSegments(List segments, SegmentHelperPojo shp) + { + SequenceI seq = shp.getSeq(); + HashMap mapping = shp.getMapping(); + TreeMap resNumMap = shp.getResNumMap(); + List omitNonObserved = shp.getOmitNonObserved(); + int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); + int pdbeNonObservedCount = shp.getPdbeNonObserved(); + int firstPDBResNum = UNASSIGNED; for (Segment segment : segments) { - System.out.println("Mappging segments : " + segment.getSegId() + "\\" - + segment.getStart() + "-" + segment.getEnd()); + // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s + // + segStartEnd); List residues = segment.getListResidue().getResidue(); for (Residue residue : residues) { - int refDbResNum = -1; + boolean isObserved = isResidueObserved(residue); + int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(), + UNASSIGNED); + int currSeqIndex = UNASSIGNED; List cRefDbs = residue.getCrossRefDb(); + CrossRefDb pdbRefDb = null; for (CrossRefDb cRefDb : cRefDbs) { - if (cRefDb.getDbAccessionId().equalsIgnoreCase( - crossRefAccessionId)) + if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) + { + pdbRefDb = cRefDb; + if (firstPDBResNum == UNASSIGNED) + { + firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(), + UNASSIGNED); + } + else + { + if (isObserved) + { + // after we find the first observed residue we just increment + firstPDBResNum++; + } + } + } + if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) + && isAccessionMatched(cRefDb.getDbAccessionId())) + { + currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(), + UNASSIGNED); + if (pdbRefDb != null) + { + break;// exit loop if pdb and uniprot are already found + } + } + } + if (!isObserved) + { + ++pdbeNonObservedCount; + } + if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe + // ??? + { + // if the sequence has a primary reference to the PDB, then we are + // dealing with a sequence extracted directly from the PDB. In that + // case, numbering is PDBe - non-observed residues + currSeqIndex = seq.getStart() - 1 + pdbeIndex; + } + if (!isObserved) + { + if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only + // here { - refDbResNum = Integer.valueOf(cRefDb.getDbResNum()); + // mapping to PDB or PDBe so we need to bookkeep for the + // non-observed + // SEQRES positions + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; } } - if (refDbResNum == -1) + if (currSeqIndex == UNASSIGNED) { + // change in logic - unobserved residues with no currSeqIndex + // corresponding are still counted in both nonObservedShiftIndex and + // pdbeIndex... continue; } - for (int[] x : mapping) + // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) // + // true + // numbering + // is + // not + // up + // to + // seq.getEnd() { - if (x[1] == refDbResNum) + + int resNum = (pdbRefDb == null) + ? getLeadingIntegerValue(residue.getDbResNum(), + UNASSIGNED) + : getLeadingIntegerValue(pdbRefDb.getDbResNum(), + UNASSIGNED); + + if (isObserved) { - int resNum = Integer.valueOf(residue.getDbResNum()); - x[0] = resNum; - String value = "x"; - resNumMap.put(resNum, value); + char resCharCode = ResidueProperties + .getSingleCharacterCode(ResidueProperties + .getCanonicalAminoAcid(residue.getDbResName())); + resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + + int[] mappingcols = new int[] { Integer.valueOf(resNum), + UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED }; + + mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols); } } } } - - //Generate visual mapping output - // StringBuilder strucSeq = new StringBuilder(); - // for(int[] x : mapping){ - // if(mapping[0] == 0){ - // strucSeq.append(b) - // } - // } - mappedStrucSeq[1] = "x"; + } + + /** + * Get the leading integer part of a string that begins with an integer. + * + * @param input + * - the string input to process + * @param failValue + * - value returned if unsuccessful + * @return + */ + static int getLeadingIntegerValue(String input, int failValue) + { + if (input == null) + { + return failValue; + } + String[] parts = input.split("(?=\\D)(?<=\\d)"); + if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) + { + return Integer.valueOf(parts[0]); + } + return failValue; + } + + /** + * + * @param chainId + * Target chain to populate mapping of its atom positions. + * @param mapping + * Two dimension array of residue index versus atom position + * @throws IllegalArgumentException + * Thrown if chainId or mapping is null + * @throws SiftsException + */ + void populateAtomPositions(String chainId, Map mapping) + throws IllegalArgumentException, SiftsException + { try { - System.out.println(">>>> seq: " + seqStr + "\nlength " - + seqStr.length()); - System.out.println(">>>> pdb: " + mappedStrucSeq.toString() - + "\nlength " + mappedStrucSeq.toString().length()); + PDBChain chain = pdb.findChain(chainId); - String printedMapping = getMappingOutput(mappedStrucSeq.toString(), - seqStr, "seqAccession", "strucAccession", "pep", 3) - .toString(); - if (os != null) + if (chain == null || mapping == null) + { + throw new IllegalArgumentException( + "Chain id or mapping must not be null."); + } + for (int[] map : mapping.values()) { - os.print(printedMapping); + if (map[PDB_RES_POS] != UNASSIGNED) + { + map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + } } - System.out.println(); - } catch (Exception ex) + } catch (NullPointerException e) + { + throw new SiftsException(e.getMessage()); + } catch (Exception e) { - ex.printStackTrace(); + throw new SiftsException(e.getMessage()); } - return mapping; } - @Override - public boolean isFoundInSiftsEntry(String accessionId) + /** + * + * @param residueIndex + * The residue index used for the search + * @param atoms + * A collection of Atom to search + * @return atom position for the given residue index + */ + int getAtomIndex(int residueIndex, Collection atoms) { - return accessionId != null - && getAllMappingAccession().contains(accessionId); + if (atoms == null) + { + throw new IllegalArgumentException( + "atoms collection must not be null!"); + } + for (Atom atom : atoms) + { + if (atom.resNumber == residueIndex) + { + return atom.atomIndex; + } + } + return UNASSIGNED; } - @Override - public StructureMapping getSiftsStructureMapping(SequenceI seq, - String pdbFile, String chain) + /** + * Checks if the residue instance is marked 'Not_observed' or not + * + * @param residue + * @return + */ + private boolean isResidueObserved(Residue residue) { - System.out.println("Getting mapping for: " + pdbId + "|" + chain - + " : seq- " + seq.getName()); - - final StringBuilder mappingDetails = new StringBuilder(128); - PrintStream ps = new PrintStream(System.out) + Set annotations = getResidueAnnotaitons(residue, + ResidueDetailType.ANNOTATION); + if (annotations == null || annotations.isEmpty()) { - @Override - public void print(String x) + return true; + } + for (String annotation : annotations) + { + if (annotation.equalsIgnoreCase(NOT_OBSERVED)) { - mappingDetails.append(x); + return false; } + } + return true; + } - @Override - public void println() + /** + * Get annotation String for a given residue and annotation type + * + * @param residue + * @param type + * @return + */ + private Set getResidueAnnotaitons(Residue residue, + ResidueDetailType type) + { + HashSet foundAnnotations = new HashSet(); + List resDetails = residue.getResidueDetail(); + for (ResidueDetail resDetail : resDetails) + { + if (resDetail.getProperty().equalsIgnoreCase(type.getCode())) { - mappingDetails.append(NEWLINE); + foundAnnotations.add(resDetail.getContent()); } - }; - int[][] mapping = null; - try + } + return foundAnnotations; + } + + @Override + public boolean isAccessionMatched(String accession) + { + boolean isStrictMatch = true; + return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) + : curDBRefAccessionIdsString.contains(accession.toLowerCase()); + } + + private boolean isFoundInSiftsEntry(String accessionId) + { + Set siftsDBRefs = getAllMappingAccession(); + return accessionId != null + && siftsDBRefs.contains(accessionId.toLowerCase()); + } + + /** + * Pad omitted residue positions in PDB sequence with gaps + * + * @param resNumMap + */ + void padWithGaps(Map resNumMap, + List omitNonObserved) + { + if (resNumMap == null || resNumMap.isEmpty()) { - mapping = getGreedyMapping(chain, seq, ps); - } catch (Exception e) + return; + } + Integer[] keys = resNumMap.keySet().toArray(new Integer[0]); + // Arrays.sort(keys); + int firstIndex = keys[0]; + int lastIndex = keys[keys.length - 1]; + // System.out.println("Min value " + firstIndex); + // System.out.println("Max value " + lastIndex); + for (int x = firstIndex; x <= lastIndex; x++) { - e.printStackTrace(); + if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x)) + { + resNumMap.put(x, "-"); + } } - // String mappingOutput = mappingDetails.toString(); - String mappingOutput = null; - return new StructureMapping(seq, pdbFile, pdbId, chain, mapping, - mappingOutput); } @Override - public Entity getEntityById(String id) throws Exception + public Entity getEntityById(String id) throws SiftsException { + // Determines an entity to process by performing a heuristic matching of all + // Entities with the given chainId and choosing the best matching Entity + Entity entity = getEntityByMostOptimalMatchedId(id); + if (entity != null) + { + return entity; + } + throw new SiftsException("Entity " + id + " not found"); + } + + /** + * This method was added because EntityId is NOT always equal to ChainId. + * Hence, it provides the logic to greedily detect the "true" Entity for a + * given chainId where discrepancies exist. + * + * @param chainId + * @return + */ + public Entity getEntityByMostOptimalMatchedId(String chainId) + { + // System.out.println("---> advanced greedy entityId matching block + // entered.."); List entities = siftsEntry.getEntity(); + SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; + int count = 0; for (Entity entity : entities) { - if (!entity.getEntityId().equalsIgnoreCase(id)) + sPojo[count] = new SiftsEntitySortPojo(); + sPojo[count].entityId = entity.getEntityId(); + + List segments = entity.getSegment(); + for (Segment segment : segments) { - continue; + List residues = segment.getListResidue().getResidue(); + for (Residue residue : residues) + { + List cRefDbs = residue.getCrossRefDb(); + for (CrossRefDb cRefDb : cRefDbs) + { + if (!cRefDb.getDbSource().equalsIgnoreCase("PDB")) + { + continue; + } + ++sPojo[count].resCount; + if (cRefDb.getDbChainId().equalsIgnoreCase(chainId)) + { + ++sPojo[count].chainIdFreq; + } + } + } } - return entity; + sPojo[count].pid = (100 * sPojo[count].chainIdFreq) + / sPojo[count].resCount; + ++count; } - throw new Exception("Entity " + id + " not found"); + Arrays.sort(sPojo, Collections.reverseOrder()); + // System.out.println("highest matched entity : " + sPojo[0].entityId); + // System.out.println("highest matched pid : " + sPojo[0].pid); + + if (sPojo[0].entityId != null) + { + if (sPojo[0].pid < 1) + { + return null; + } + for (Entity entity : entities) + { + if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) + { + continue; + } + return entity; + } + } + return null; } - @Override - public String[] getEntryDBs() + private class SiftsEntitySortPojo + implements Comparable { - System.out.println("\nListing DB entries..."); - List dbs = siftsEntry.getListDB().getDb(); - for (Db db : dbs) + public String entityId; + + public int chainIdFreq; + + public int pid; + + public int resCount; + + @Override + public int compareTo(SiftsEntitySortPojo o) { - System.out.println(db.getDbSource() + " | " + db.getDbCoordSys()); + return this.pid - o.pid; } - return null; } - @Override - public void getEntryDetails() + private class SegmentHelperPojo { - List eds = siftsEntry.getEntryDetail(); - for (EntryDetail ed : eds) + private SequenceI seq; + + private HashMap mapping; + + private TreeMap resNumMap; + + private List omitNonObserved; + + private int nonObservedShiftIndex; + + /** + * count of number of 'not observed' positions in the PDB record's SEQRES + * (total number of residues with coordinates == length(SEQRES) - + * pdbeNonObserved + */ + private int pdbeNonObserved; + + public SegmentHelperPojo(SequenceI seq, HashMap mapping, + TreeMap resNumMap, + List omitNonObserved, int nonObservedShiftIndex, + int pdbeNonObserved) { - System.out.println("Entry Details: " + ed.getContent() + " " - + ed.getDbSource() + " " + ed.getProperty() + " " - + ed.toString()); + setSeq(seq); + setMapping(mapping); + setResNumMap(resNumMap); + setOmitNonObserved(omitNonObserved); + setNonObservedShiftIndex(nonObservedShiftIndex); + setPdbeNonObserved(pdbeNonObserved); + } + + public void setPdbeNonObserved(int pdbeNonObserved2) + { + this.pdbeNonObserved = pdbeNonObserved2; + } + + public int getPdbeNonObserved() + { + return pdbeNonObserved; + } + public SequenceI getSeq() + { + return seq; + } + + public void setSeq(SequenceI seq) + { + this.seq = seq; + } + + public HashMap getMapping() + { + return mapping; + } + + public void setMapping(HashMap mapping) + { + this.mapping = mapping; + } + + public TreeMap getResNumMap() + { + return resNumMap; + } + + public void setResNumMap(TreeMap resNumMap) + { + this.resNumMap = resNumMap; + } + + public List getOmitNonObserved() + { + return omitNonObserved; + } + + public void setOmitNonObserved(List omitNonObserved) + { + this.omitNonObserved = omitNonObserved; + } + + public int getNonObservedShiftIndex() + { + return nonObservedShiftIndex; + } + + public void setNonObservedShiftIndex(int nonObservedShiftIndex) + { + this.nonObservedShiftIndex = nonObservedShiftIndex; + } + } @Override - public StringBuffer getMappingOutput(String astr1, String astr2, String s1id, - String s2id, String type, int nochunks) + public StringBuilder getMappingOutput(MappingOutputPojo mp) + throws SiftsException { - int maxid = s1id.length(); + String seqRes = mp.getSeqResidue(); + String seqName = mp.getSeqName(); + int sStart = mp.getSeqStart(); + int sEnd = mp.getSeqEnd(); + + String strRes = mp.getStrResidue(); + String strName = mp.getStrName(); + int pdbStart = mp.getStrStart(); + int pdbEnd = mp.getStrEnd(); + + String type = mp.getType(); + + int maxid = (seqName.length() >= strName.length()) ? seqName.length() + : strName.length(); int len = 72 - maxid - 1; - StringBuffer output = new StringBuffer(); + + int nochunks = ((seqRes.length()) / len) + + ((seqRes.length()) % len > 0 ? 1 : 0); // output mappings - float pid = 0; + StringBuilder output = new StringBuilder(512); + output.append(NEWLINE); + output.append("Sequence \u27f7 Structure mapping details") + .append(NEWLINE); + output.append("Method: SIFTS"); + output.append(NEWLINE).append(NEWLINE); + + output.append(new Format("%" + maxid + "s").form(seqName)); + output.append(" : "); + output.append(String.valueOf(sStart)); + output.append(" - "); + output.append(String.valueOf(sEnd)); + output.append(" Maps to "); + output.append(NEWLINE); + output.append(new Format("%" + maxid + "s").form(structId)); + output.append(" : "); + output.append(String.valueOf(pdbStart)); + output.append(" - "); + output.append(String.valueOf(pdbEnd)); + output.append(NEWLINE).append(NEWLINE); + + ScoreMatrix pam250 = ScoreModels.getInstance().getPam250(); + int matchedSeqCount = 0; for (int j = 0; j < nochunks; j++) { // Print the first aligned sequence - output.append(new Format("%" + (maxid) + "s").form(s1id)).append(" "); + output.append(new Format("%" + (maxid) + "s").form(seqName)) + .append(" "); for (int i = 0; i < len; i++) { - if ((i + (j * len)) < astr1.length()) + if ((i + (j * len)) < seqRes.length()) { - output.append(astr1.charAt(i + (j * len))); + output.append(seqRes.charAt(i + (j * len))); } } output.append(NEWLINE); output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); - // Print out the matching chars + /* + * Print out the match symbols: + * | for exact match (ignoring case) + * . if PAM250 score is positive + * else a space + */ for (int i = 0; i < len; i++) { - if ((i + (j * len)) < astr1.length()) + try { - if (astr1.charAt(i + (j * len)) == astr2.charAt(i + (j * len)) - && !jalview.util.Comparison.isGap(astr1.charAt(i - + (j * len)))) + if ((i + (j * len)) < seqRes.length()) { - pid++; - output.append("|"); - } - else if (type.equals("pep")) - { - if (ResidueProperties.getPAM250(astr1.charAt(i + (j * len)), - astr2.charAt(i + (j * len))) > 0) + char c1 = seqRes.charAt(i + (j * len)); + char c2 = strRes.charAt(i + (j * len)); + boolean sameChar = Comparison.isSameResidue(c1, c2, false); + if (sameChar && !Comparison.isGap(c1)) + { + matchedSeqCount++; + output.append("|"); + } + else if (type.equals("pep")) { - output.append("."); + if (pam250.getPairwiseScore(c1, c2) > 0) + { + output.append("."); + } + else + { + output.append(" "); + } } else { output.append(" "); } } - else - { - output.append(" "); - } + } catch (IndexOutOfBoundsException e) + { + continue; } } // Now print the second aligned sequence output = output.append(NEWLINE); - output = output.append(new Format("%" + (maxid) + "s").form(s2id)) + output = output.append(new Format("%" + (maxid) + "s").form(strName)) .append(" "); for (int i = 0; i < len; i++) { - if ((i + (j * len)) < astr2.length()) + if ((i + (j * len)) < strRes.length()) { - output.append(astr2.charAt(i + (j * len))); + output.append(strRes.charAt(i + (j * len))); } } output.append(NEWLINE).append(NEWLINE); } - pid = pid / (astr1.length()) * 100; - System.out.println(output); - System.out.println(pid); - // TODO return output & pid + float pid = (float) matchedSeqCount / seqRes.length() * 100; + if (pid < SiftsSettings.getFailSafePIDThreshold()) + { + throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); + } + output.append("Length of alignment = " + seqRes.length()) + .append(NEWLINE); + output.append(new Format("Percentage ID = %2.2f").form(pid)); return output; } - + @Override public int getEntityCount() { @@ -605,12 +1260,6 @@ public class SiftsClient implements SiftsClientI } @Override - public String getDbEvidence() - { - return siftsEntry.getDbEvidence(); - } - - @Override public String getDbSource() { return siftsEntry.getDbSource(); @@ -621,4 +1270,10 @@ public class SiftsClient implements SiftsClientI { return siftsEntry.getDbVersion(); } + + public static void setMockSiftsFile(File file) + { + mockSiftsFile = file; + } + }