X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=0c707e5c18a9951715d83a84b55925244feea0f6;hb=57738a1f3c19b1c3a00bd3ac5108f8cd0af32f99;hp=13e52cdcf92e9eb58f7e847ce5c3daac79eb9a7d;hpb=37bc56d23032f26c0da11b0532902d1f629a2080;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 13e52cd..0c707e5 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -20,23 +20,7 @@ */ package jalview.ws.sifts; -import jalview.analysis.AlignSeq; -import jalview.api.DBRefEntryI; -import jalview.api.SiftsClientI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; -import jalview.datamodel.SequenceI; -import jalview.io.StructureFile; -import jalview.schemes.ResidueProperties; -import jalview.structure.StructureMapping; -import jalview.util.Format; -import jalview.xml.binding.sifts.Entry; -import jalview.xml.binding.sifts.Entry.Entity; -import jalview.xml.binding.sifts.Entry.Entity.Segment; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; +import java.util.Locale; import java.io.File; import java.io.FileInputStream; @@ -63,15 +47,45 @@ import java.util.TreeMap; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; import javax.xml.bind.Unmarshaller; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamReader; -import MCview.Atom; -import MCview.PDBChain; +import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.api.DBRefEntryI; +import jalview.api.SiftsClientI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.io.BackupFiles; +import jalview.io.StructureFile; +import jalview.schemes.ResidueProperties; +import jalview.structure.StructureMapping; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; +import jalview.util.Format; +import jalview.util.Platform; +import jalview.xml.binding.sifts.Entry; +import jalview.xml.binding.sifts.Entry.Entity; +import jalview.xml.binding.sifts.Entry.Entity.Segment; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; +import mc_view.Atom; +import mc_view.PDBChain; public class SiftsClient implements SiftsClientI { + /* + * for use in mocking out file fetch for tests only + * - reset to null after testing! + */ + private static File mockSiftsFile; + private Entry siftsEntry; private StructureFile pdb; @@ -80,18 +94,26 @@ public class SiftsClient implements SiftsClientI private String structId; - // private String segStartEnd; - private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; + /** + * PDB sequence position to sequence coordinate mapping as derived from SIFTS + * record for the identified SeqCoordSys Used for lift-over from sequence + * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence + * being annotated with PDB data + */ + private jalview.datamodel.Mapping seqFromPdbMapping; + private static final int BUFFER_SIZE = 4096; - public static final int UNASSIGNED = -1; + public static final int UNASSIGNED = Integer.MIN_VALUE; private static final int PDB_RES_POS = 0; private static final int PDB_ATOM_POS = 1; + private static final int PDBE_POS = 2; + private static final String NOT_OBSERVED = "Not_Observed"; private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; @@ -102,9 +124,10 @@ public class SiftsClient implements SiftsClientI private HashSet curDBRefAccessionIdsString; - public enum CoordinateSys + private enum CoordinateSys { UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); + private String name; private CoordinateSys(String name) @@ -118,10 +141,11 @@ public class SiftsClient implements SiftsClientI } }; - public enum ResidueDetailType + private enum ResidueDetailType { - NAME_SEC_STRUCTURE("nameSecondaryStructure"), CODE_SEC_STRUCTURE( - "codeSecondaryStructure"), ANNOTATION("Annotation"); + NAME_SEC_STRUCTURE("nameSecondaryStructure"), + CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation"); + private String code; private ResidueDetailType(String code) @@ -150,7 +174,6 @@ public class SiftsClient implements SiftsClientI siftsEntry = parseSIFTs(siftsFile); } - /** * Parse the given SIFTs File and return a JAXB POJO of parsed data * @@ -170,7 +193,8 @@ public class SiftsClient implements SiftsClientI XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); - return (Entry) um.unmarshal(streamReader); + JAXBElement jbe = um.unmarshal(streamReader, Entry.class); + return jbe.getValue(); } catch (Exception e) { e.printStackTrace(); @@ -188,8 +212,16 @@ public class SiftsClient implements SiftsClientI */ public static File getSiftsFile(String pdbId) throws SiftsException { + /* + * return mocked file if it has been set + */ + if (mockSiftsFile != null) + { + return mockSiftsFile; + } + String siftsFileName = SiftsSettings.getSiftDownloadDirectory() - + pdbId.toLowerCase() + ".xml.gz"; + + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz"; File siftsFile = new File(siftsFileName); if (siftsFile.exists()) { @@ -200,23 +232,27 @@ public class SiftsClient implements SiftsClientI SiftsSettings.getCacheThresholdInDays())) { File oldSiftsFile = new File(siftsFileName + "_old"); - siftsFile.renameTo(oldSiftsFile); + BackupFiles.moveFileToFile(siftsFile, oldSiftsFile); try { - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT)); oldSiftsFile.delete(); return siftsFile; } catch (IOException e) { e.printStackTrace(); - oldSiftsFile.renameTo(siftsFile); + BackupFiles.moveFileToFile(oldSiftsFile, siftsFile); return new File(siftsFileName); } } + else + { + return siftsFile; + } } try { - siftsFile = downloadSiftsFile(pdbId.toLowerCase()); + siftsFile = downloadSiftsFile(pdbId.toLowerCase(Locale.ROOT)); } catch (IOException e) { throw new SiftsException(e.getMessage()); @@ -242,8 +278,9 @@ public class SiftsClient implements SiftsClientI try { attr = Files.readAttributes(filePath, BasicFileAttributes.class); - diffInDays = (int) ((new Date().getTime() - attr.lastModifiedTime() - .toMillis()) / (1000 * 60 * 60 * 24)); + diffInDays = (int) ((new Date().getTime() + - attr.lastModifiedTime().toMillis()) + / (1000 * 60 * 60 * 24)); // System.out.println("Diff in days : " + diffInDays); } catch (IOException e) { @@ -260,8 +297,8 @@ public class SiftsClient implements SiftsClientI * @throws SiftsException * @throws IOException */ - public static File downloadSiftsFile(String pdbId) throws SiftsException, - IOException + public static File downloadSiftsFile(String pdbId) + throws SiftsException, IOException { if (pdbId.contains(".cif")) { @@ -269,30 +306,46 @@ public class SiftsClient implements SiftsClientI } String siftFile = pdbId + ".xml.gz"; String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; - String downloadedSiftsFile = SiftsSettings.getSiftDownloadDirectory() - + siftFile; - File siftsDownloadDir = new File( - SiftsSettings.getSiftDownloadDirectory()); - if (!siftsDownloadDir.exists()) + + /* + * Download the file from URL to either + * Java: directory of cached downloaded SIFTS files + * Javascript: temporary 'file' (in-memory cache) + */ + File downloadTo = null; + if (Platform.isJS()) { - siftsDownloadDir.mkdirs(); + downloadTo = File.createTempFile(siftFile, ".xml.gz"); } - // System.out.println(">> Download ftp url : " + siftsFileFTPURL); - URL url = new URL(siftsFileFTPURL); - URLConnection conn = url.openConnection(); - InputStream inputStream = conn.getInputStream(); - FileOutputStream outputStream = new FileOutputStream( - downloadedSiftsFile); - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead = -1; - while ((bytesRead = inputStream.read(buffer)) != -1) + else + { + downloadTo = new File( + SiftsSettings.getSiftDownloadDirectory() + siftFile); + File siftsDownloadDir = new File( + SiftsSettings.getSiftDownloadDirectory()); + if (!siftsDownloadDir.exists()) { - outputStream.write(buffer, 0, bytesRead); + siftsDownloadDir.mkdirs(); } - outputStream.close(); - inputStream.close(); - // System.out.println(">>> File downloaded : " + downloadedSiftsFile); - return new File(downloadedSiftsFile); + } + + // System.out.println(">> Download ftp url : " + siftsFileFTPURL); + // long now = System.currentTimeMillis(); + URL url = new URL(siftsFileFTPURL); + URLConnection conn = url.openConnection(); + InputStream inputStream = conn.getInputStream(); + FileOutputStream outputStream = new FileOutputStream(downloadTo); + byte[] buffer = new byte[BUFFER_SIZE]; + int bytesRead = -1; + while ((bytesRead = inputStream.read(buffer)) != -1) + { + outputStream.write(buffer, 0, bytesRead); + } + outputStream.close(); + inputStream.close(); + // System.out.println(">>> File downloaded : " + downloadedSiftsFile + // + " took " + (System.currentTimeMillis() - now) + "ms"); + return downloadTo; } /** @@ -305,7 +358,7 @@ public class SiftsClient implements SiftsClientI public static boolean deleteSiftsFileByPDBId(String pdbId) { File siftsFile = new File(SiftsSettings.getSiftDownloadDirectory() - + pdbId.toLowerCase() + ".xml.gz"); + + pdbId.toLowerCase(Locale.ROOT) + ".xml.gz"); if (siftsFile.exists()) { return siftsFile.delete(); @@ -325,41 +378,29 @@ public class SiftsClient implements SiftsClientI public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws SiftsException { - DBRefEntryI sourceDBRef = null; - sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) + List dbRefs = seq.getPrimaryDBRefs(); + if (dbRefs == null || dbRefs.size() < 1) { - return sourceDBRef; + throw new SiftsException( + "Source DBRef could not be determined. DBRefs might not have been retrieved."); } - else + + for (DBRefEntry dbRef : dbRefs) { - DBRefEntry[] dbRefs = seq.getDBRefs(); - if (dbRefs == null || dbRefs.length < 1) + if (dbRef == null || dbRef.getAccessionId() == null + || dbRef.getSource() == null) { - throw new SiftsException( - "Source DBRef could not be determined. DBRefs might not have been retrieved."); + continue; } - - for (DBRefEntryI dbRef : dbRefs) + String canonicalSource = DBRefUtils + .getCanonicalName(dbRef.getSource()); + if (isValidDBRefEntry(dbRef) + && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT) + || canonicalSource.equalsIgnoreCase(DBRefSource.PDB))) { - if (dbRef == null || dbRef.getAccessionId() == null - || dbRef.getSource() == null) - { - continue; - } - if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef - .getSource().equalsIgnoreCase(DBRefSource.PDB))) - { - seq.setSourceDBRef(dbRef); - return dbRef; - } + return dbRef; } } - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) - { - return sourceDBRef; - } throw new SiftsException("Could not get source DB Ref"); } @@ -391,8 +432,8 @@ public class SiftsClient implements SiftsClientI .getMapRegion(); for (MapRegion mapRegion : mapRegions) { - accessions - .add(mapRegion.getDb().getDbAccessionId().toLowerCase()); + accessions.add(mapRegion.getDb().getDbAccessionId() + .toLowerCase(Locale.ROOT)); } } } @@ -403,9 +444,14 @@ public class SiftsClient implements SiftsClientI public StructureMapping getSiftsStructureMapping(SequenceI seq, String pdbFile, String chain) throws SiftsException { + SequenceI aseq = seq; + while (seq.getDatasetSequence() != null) + { + seq = seq.getDatasetSequence(); + } structId = (chain == null) ? pdbId : pdbId + "|" + chain; - System.out.println("Getting mapping for: " + pdbId + "|" + chain - + " : seq- " + seq.getName()); + System.out.println("Getting SIFTS mapping for " + structId + ": seq " + + seq.getName()); final StringBuilder mappingDetails = new StringBuilder(128); PrintStream ps = new PrintStream(System.out) @@ -425,8 +471,9 @@ public class SiftsClient implements SiftsClientI HashMap mapping = getGreedyMapping(chain, seq, ps); String mappingOutput = mappingDetails.toString(); - StructureMapping siftsMapping = new StructureMapping(seq, pdbFile, - pdbId, chain, mapping, mappingOutput); + StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile, + pdbId, chain, mapping, mappingOutput, seqFromPdbMapping); + return siftsMapping; } @@ -434,15 +481,15 @@ public class SiftsClient implements SiftsClientI public HashMap getGreedyMapping(String entityId, SequenceI seq, java.io.PrintStream os) throws SiftsException { - List omitNonObserved = new ArrayList(); - int nonObservedShiftIndex = 0; + List omitNonObserved = new ArrayList<>(); + int nonObservedShiftIndex = 0, pdbeNonObserved = 0; // System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); String originalSeq = AlignSeq.extractGaps( jalview.util.Comparison.GapChars, seq.getSequenceAsString()); HashMap mapping = new HashMap(); - DBRefEntryI sourceDBRef = seq.getSourceDBRef(); + DBRefEntryI sourceDBRef; sourceDBRef = getValidSourceDBRef(seq); // TODO ensure sequence start/end is in the same coordinate system and // consistent with the choosen sourceDBRef @@ -456,92 +503,20 @@ public class SiftsClient implements SiftsClientI HashSet dbRefAccessionIdsString = new HashSet(); for (DBRefEntry dbref : seq.getDBRefs()) { - dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase()); + dbRefAccessionIdsString + .add(dbref.getAccessionId().toLowerCase(Locale.ROOT)); } - dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase()); + dbRefAccessionIdsString + .add(sourceDBRef.getAccessionId().toLowerCase(Locale.ROOT)); curDBRefAccessionIdsString = dbRefAccessionIdsString; curSourceDBRef = sourceDBRef.getAccessionId(); TreeMap resNumMap = new TreeMap(); List segments = entity.getSegment(); - for (Segment segment : segments) - { - // segStartEnd = segment.getStart() + " - " + segment.getEnd(); - // System.out.println("Mapping segments : " + segment.getSegId() + "\\" - // + segStartEnd); - List residues = segment.getListResidue().getResidue(); - for (Residue residue : residues) - { - int currSeqIndex = UNASSIGNED; - List cRefDbs = residue.getCrossRefDb(); - CrossRefDb pdbRefDb = null; - for (CrossRefDb cRefDb : cRefDbs) - { - if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) - { - pdbRefDb = cRefDb; - } - if (cRefDb.getDbCoordSys() - .equalsIgnoreCase(seqCoordSys.getName()) - && isAccessionMatched(cRefDb.getDbAccessionId())) - { - String resNumIndexString = cRefDb.getDbResNum() - .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED) - : cRefDb.getDbResNum(); - try - { - currSeqIndex = Integer.valueOf(resNumIndexString); - } catch (NumberFormatException nfe) - { - currSeqIndex = Integer.valueOf(resNumIndexString - .split("[a-zA-Z]")[0]); - continue; - } - if (pdbRefDb != null) - { - break;// exit loop if pdb and uniprot are already found - } - } - } - if (currSeqIndex == UNASSIGNED) - { - continue; - } - if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) - { - int resNum; - try - { - resNum = (pdbRefDb == null) ? Integer.valueOf(residue - .getDbResNum()) : Integer.valueOf(pdbRefDb - .getDbResNum()); - } catch (NumberFormatException nfe) - { - resNum = (pdbRefDb == null) ? Integer.valueOf(residue - .getDbResNum()) : Integer.valueOf(pdbRefDb - .getDbResNum().split("[a-zA-Z]")[0]); - continue; - } - - if (isResidueObserved(residue) - || seqCoordSys == CoordinateSys.UNIPROT) - { - char resCharCode = ResidueProperties - .getSingleCharacterCode(ResidueProperties - .getCanonicalAminoAcid(residue.getDbResName())); - resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); - } - else - { - omitNonObserved.add(currSeqIndex); - ++nonObservedShiftIndex; - } - mapping.put(currSeqIndex - nonObservedShiftIndex, new int[] { - Integer.valueOf(resNum), UNASSIGNED }); - } - } - } + SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, + omitNonObserved, nonObservedShiftIndex, pdbeNonObserved); + processSegments(segments, shp); try { populateAtomPositions(entityId, mapping); @@ -558,25 +533,78 @@ public class SiftsClient implements SiftsClientI int pdbStart = UNASSIGNED; int pdbEnd = UNASSIGNED; - Integer[] keys = mapping.keySet().toArray(new Integer[0]); - Arrays.sort(keys); - if (keys.length < 1) + if (mapping.isEmpty()) { - throw new SiftsException(">>> Empty SIFTS mapping generated!!"); + throw new SiftsException("SIFTS mapping failed"); } + // also construct a mapping object between the seq-coord sys and the PDB + // seq's coord sys + + Integer[] keys = mapping.keySet().toArray(new Integer[0]); + Arrays.sort(keys); seqStart = keys[0]; seqEnd = keys[keys.length - 1]; - + List from = new ArrayList<>(), to = new ArrayList<>(); + int[] _cfrom = null, _cto = null; String matchedSeq = originalSeq; - if (seqStart != UNASSIGNED) + if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb + // sequence that starts <-1 { + for (int seqps : keys) + { + int pdbpos = mapping.get(seqps)[PDBE_POS]; + if (pdbpos == UNASSIGNED) + { + // not correct - pdbpos might be -1, but leave it for now + continue; + } + if (_cfrom == null || seqps != _cfrom[1] + 1) + { + _cfrom = new int[] { seqps, seqps }; + from.add(_cfrom); + _cto = null; // discontinuity + } + else + { + _cfrom[1] = seqps; + } + if (_cto == null || pdbpos != 1 + _cto[1]) + { + _cto = new int[] { pdbpos, pdbpos }; + to.add(_cto); + } + else + { + _cto[1] = pdbpos; + } + } + _cfrom = new int[from.size() * 2]; + _cto = new int[to.size() * 2]; + int p = 0; + for (int[] range : from) + { + _cfrom[p++] = range[0]; + _cfrom[p++] = range[1]; + } + ; + p = 0; + for (int[] range : to) + { + _cto[p++] = range[0]; + _cto[p++] = range[1]; + } + ; + + seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom, + 1, 1); pdbStart = mapping.get(seqStart)[PDB_RES_POS]; pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; int orignalSeqStart = seq.getStart(); if (orignalSeqStart >= 1) { - int subSeqStart = (seqStart >= orignalSeqStart) ? seqStart - - orignalSeqStart : 0; + int subSeqStart = (seqStart >= orignalSeqStart) + ? seqStart - orignalSeqStart + : 0; int subSeqEnd = seqEnd - (orignalSeqStart - 1); subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() : subSeqEnd; @@ -597,13 +625,13 @@ public class SiftsClient implements SiftsClientI if (os != null) { MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(pdbStart); - mop.setSeqEnd(pdbEnd); + mop.setSeqStart(seqStart); + mop.setSeqEnd(seqEnd); mop.setSeqName(seq.getName()); mop.setSeqResidue(matchedSeq); - mop.setStrStart(seqStart); - mop.setStrEnd(seqEnd); + mop.setStrStart(pdbStart); + mop.setStrEnd(pdbEnd); mop.setStrName(structId); mop.setStrResidue(targetStrucSeqs.toString()); @@ -614,6 +642,145 @@ public class SiftsClient implements SiftsClientI return mapping; } + void processSegments(List segments, SegmentHelperPojo shp) + { + SequenceI seq = shp.getSeq(); + HashMap mapping = shp.getMapping(); + TreeMap resNumMap = shp.getResNumMap(); + List omitNonObserved = shp.getOmitNonObserved(); + int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); + int pdbeNonObservedCount = shp.getPdbeNonObserved(); + int firstPDBResNum = UNASSIGNED; + for (Segment segment : segments) + { + // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s + // + segStartEnd); + List residues = segment.getListResidue().getResidue(); + for (Residue residue : residues) + { + boolean isObserved = isResidueObserved(residue); + int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(), + UNASSIGNED); + int currSeqIndex = UNASSIGNED; + List cRefDbs = residue.getCrossRefDb(); + CrossRefDb pdbRefDb = null; + for (CrossRefDb cRefDb : cRefDbs) + { + if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) + { + pdbRefDb = cRefDb; + if (firstPDBResNum == UNASSIGNED) + { + firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(), + UNASSIGNED); + } + else + { + if (isObserved) + { + // after we find the first observed residue we just increment + firstPDBResNum++; + } + } + } + if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) + && isAccessionMatched(cRefDb.getDbAccessionId())) + { + currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(), + UNASSIGNED); + if (pdbRefDb != null) + { + break;// exit loop if pdb and uniprot are already found + } + } + } + if (!isObserved) + { + ++pdbeNonObservedCount; + } + if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe + // ??? + { + // if the sequence has a primary reference to the PDB, then we are + // dealing with a sequence extracted directly from the PDB. In that + // case, numbering is PDBe - non-observed residues + currSeqIndex = seq.getStart() - 1 + pdbeIndex; + } + if (!isObserved) + { + if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only + // here + { + // mapping to PDB or PDBe so we need to bookkeep for the + // non-observed + // SEQRES positions + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; + } + } + if (currSeqIndex == UNASSIGNED) + { + // change in logic - unobserved residues with no currSeqIndex + // corresponding are still counted in both nonObservedShiftIndex and + // pdbeIndex... + continue; + } + // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) // + // true + // numbering + // is + // not + // up + // to + // seq.getEnd() + { + + int resNum = (pdbRefDb == null) + ? getLeadingIntegerValue(residue.getDbResNum(), + UNASSIGNED) + : getLeadingIntegerValue(pdbRefDb.getDbResNum(), + UNASSIGNED); + + if (isObserved) + { + char resCharCode = ResidueProperties + .getSingleCharacterCode(ResidueProperties + .getCanonicalAminoAcid(residue.getDbResName())); + resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + + int[] mappingcols = new int[] { Integer.valueOf(resNum), + UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED }; + + mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols); + } + } + } + } + } + + /** + * Get the leading integer part of a string that begins with an integer. + * + * @param input + * - the string input to process + * @param failValue + * - value returned if unsuccessful + * @return + */ + static int getLeadingIntegerValue(String input, int failValue) + { + if (input == null) + { + return failValue; + } + String[] parts = input.split("(?=\\D)(?<=\\d)"); + if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) + { + return Integer.valueOf(parts[0]); + } + return failValue; + } + /** * * @param chainId @@ -728,14 +895,15 @@ public class SiftsClient implements SiftsClientI { boolean isStrictMatch = true; return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) - : curDBRefAccessionIdsString.contains(accession.toLowerCase()); + : curDBRefAccessionIdsString + .contains(accession.toLowerCase(Locale.ROOT)); } private boolean isFoundInSiftsEntry(String accessionId) { Set siftsDBRefs = getAllMappingAccession(); return accessionId != null - && siftsDBRefs.contains(accessionId.toLowerCase()); + && siftsDBRefs.contains(accessionId.toLowerCase(Locale.ROOT)); } /** @@ -751,7 +919,7 @@ public class SiftsClient implements SiftsClientI return; } Integer[] keys = resNumMap.keySet().toArray(new Integer[0]); - Arrays.sort(keys); + // Arrays.sort(keys); int firstIndex = keys[0]; int lastIndex = keys[keys.length - 1]; // System.out.println("Min value " + firstIndex); @@ -765,33 +933,11 @@ public class SiftsClient implements SiftsClientI } } - - @Override public Entity getEntityById(String id) throws SiftsException { - // Sometimes SIFTS mappings are wrongly swapped between different chains of - // a PDB entry. This results to wrong mappings being generated. The boolean - // flag 'isGetEntityIdDirectly, determines whether an entity to process is - // determined by a greedy heuristic search or by just matching the Chain Id - // directly against the entity Id tag. Setting the default value to 'false' - // utilise the heuristic search which always produces correct mappings but - // less optimised processing, where as changing the value to 'true' - // optimises performance but might result to incorrect mapping in some cases - // where SIFTS mappings are wrongly swapped between different chains. - // boolean isGetEntityIdDirectly = false; - // if (isGetEntityIdDirectly) - // { - // List entities = siftsEntry.getEntity(); - // for (Entity entity : entities) - // { - // if (!entity.getEntityId().equalsIgnoreCase(id)) - // { - // continue; - // } - // return entity; - // } - // } + // Determines an entity to process by performing a heuristic matching of all + // Entities with the given chainId and choosing the best matching Entity Entity entity = getEntityByMostOptimalMatchedId(id); if (entity != null) { @@ -810,7 +956,8 @@ public class SiftsClient implements SiftsClientI */ public Entity getEntityByMostOptimalMatchedId(String chainId) { - // System.out.println("---> advanced greedy entityId matching block entered.."); + // System.out.println("---> advanced greedy entityId matching block + // entered.."); List entities = siftsEntry.getEntity(); SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; int count = 0; @@ -840,7 +987,8 @@ public class SiftsClient implements SiftsClientI } } } - sPojo[count].pid = 100 * (sPojo[count].chainIdFreq / sPojo[count].resCount); + sPojo[count].pid = (100 * sPojo[count].chainIdFreq) + / sPojo[count].resCount; ++count; } Arrays.sort(sPojo, Collections.reverseOrder()); @@ -849,6 +997,10 @@ public class SiftsClient implements SiftsClientI if (sPojo[0].entityId != null) { + if (sPojo[0].pid < 1) + { + return null; + } for (Entity entity : entities) { if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) @@ -861,8 +1013,8 @@ public class SiftsClient implements SiftsClientI return null; } - public class SiftsEntitySortPojo implements - Comparable + private class SiftsEntitySortPojo + implements Comparable { public String entityId; @@ -879,9 +1031,103 @@ public class SiftsClient implements SiftsClientI } } + private class SegmentHelperPojo + { + private SequenceI seq; + + private HashMap mapping; + + private TreeMap resNumMap; + + private List omitNonObserved; + + private int nonObservedShiftIndex; + + /** + * count of number of 'not observed' positions in the PDB record's SEQRES + * (total number of residues with coordinates == length(SEQRES) - + * pdbeNonObserved + */ + private int pdbeNonObserved; + + public SegmentHelperPojo(SequenceI seq, HashMap mapping, + TreeMap resNumMap, + List omitNonObserved, int nonObservedShiftIndex, + int pdbeNonObserved) + { + setSeq(seq); + setMapping(mapping); + setResNumMap(resNumMap); + setOmitNonObserved(omitNonObserved); + setNonObservedShiftIndex(nonObservedShiftIndex); + setPdbeNonObserved(pdbeNonObserved); + + } + + public void setPdbeNonObserved(int pdbeNonObserved2) + { + this.pdbeNonObserved = pdbeNonObserved2; + } + + public int getPdbeNonObserved() + { + return pdbeNonObserved; + } + + public SequenceI getSeq() + { + return seq; + } + + public void setSeq(SequenceI seq) + { + this.seq = seq; + } + + public HashMap getMapping() + { + return mapping; + } + + public void setMapping(HashMap mapping) + { + this.mapping = mapping; + } + + public TreeMap getResNumMap() + { + return resNumMap; + } + + public void setResNumMap(TreeMap resNumMap) + { + this.resNumMap = resNumMap; + } + + public List getOmitNonObserved() + { + return omitNonObserved; + } + + public void setOmitNonObserved(List omitNonObserved) + { + this.omitNonObserved = omitNonObserved; + } + + public int getNonObservedShiftIndex() + { + return nonObservedShiftIndex; + } + + public void setNonObservedShiftIndex(int nonObservedShiftIndex) + { + this.nonObservedShiftIndex = nonObservedShiftIndex; + } + + } @Override - public StringBuffer getMappingOutput(MappingOutputPojo mp) + public StringBuilder getMappingOutput(MappingOutputPojo mp) throws SiftsException { String seqRes = mp.getSeqResidue(); @@ -903,10 +1149,10 @@ public class SiftsClient implements SiftsClientI int nochunks = ((seqRes.length()) / len) + ((seqRes.length()) % len > 0 ? 1 : 0); // output mappings - StringBuffer output = new StringBuffer(); + StringBuilder output = new StringBuilder(512); output.append(NEWLINE); - output.append("Sequence \u27f7 Structure mapping details").append( - NEWLINE); + output.append("Sequence \u27f7 Structure mapping details") + .append(NEWLINE); output.append("Method: SIFTS"); output.append(NEWLINE).append(NEWLINE); @@ -924,12 +1170,13 @@ public class SiftsClient implements SiftsClientI output.append(String.valueOf(pdbEnd)); output.append(NEWLINE).append(NEWLINE); + ScoreMatrix pam250 = ScoreModels.getInstance().getPam250(); int matchedSeqCount = 0; for (int j = 0; j < nochunks; j++) { // Print the first aligned sequence - output.append(new Format("%" + (maxid) + "s").form(seqName)).append( - " "); + output.append(new Format("%" + (maxid) + "s").form(seqName)) + .append(" "); for (int i = 0; i < len; i++) { @@ -942,25 +1189,29 @@ public class SiftsClient implements SiftsClientI output.append(NEWLINE); output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); - // Print out the matching chars + /* + * Print out the match symbols: + * | for exact match (ignoring case) + * . if PAM250 score is positive + * else a space + */ for (int i = 0; i < len; i++) { try { if ((i + (j * len)) < seqRes.length()) { - if (seqRes.charAt(i + (j * len)) == strRes - .charAt(i + (j * len)) - && !jalview.util.Comparison.isGap(seqRes.charAt(i - + (j * len)))) + char c1 = seqRes.charAt(i + (j * len)); + char c2 = strRes.charAt(i + (j * len)); + boolean sameChar = Comparison.isSameResidue(c1, c2, false); + if (sameChar && !Comparison.isGap(c1)) { matchedSeqCount++; output.append("|"); } else if (type.equals("pep")) { - if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), - strRes.charAt(i + (j * len))) > 0) + if (pam250.getPairwiseScore(c1, c2) > 0) { output.append("."); } @@ -997,8 +1248,8 @@ public class SiftsClient implements SiftsClientI { throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); } - output.append("Length of alignment = " + seqRes.length()).append( - NEWLINE); + output.append("Length of alignment = " + seqRes.length()) + .append(NEWLINE); output.append(new Format("Percentage ID = %2.2f").form(pid)); return output; } @@ -1033,4 +1284,9 @@ public class SiftsClient implements SiftsClientI return siftsEntry.getDbVersion(); } + public static void setMockSiftsFile(File file) + { + mockSiftsFile = file; + } + }