X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=68af7c3f570f36ca3b9ce35b39eb9ee9c89b57f0;hb=bd6ce8f5f9fc8e5bc8a6188d15987ce0ffd2c1ee;hp=6c94723de79a257be3b263d5fb2a4ba415ed15f5;hpb=7c621eb034de40a389393ae4a97762328f5b3046;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 6c94723..68af7c3 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -21,6 +21,8 @@ package jalview.ws.sifts; import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; import jalview.api.DBRefEntryI; import jalview.api.SiftsClientI; import jalview.datamodel.DBRefEntry; @@ -29,6 +31,8 @@ import jalview.datamodel.SequenceI; import jalview.io.StructureFile; import jalview.schemes.ResidueProperties; import jalview.structure.StructureMapping; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; import jalview.util.Format; import jalview.xml.binding.sifts.Entry; import jalview.xml.binding.sifts.Entry.Entity; @@ -72,6 +76,12 @@ import MCview.PDBChain; public class SiftsClient implements SiftsClientI { + /* + * for use in mocking out file fetch for tests only + * - reset to null after testing! + */ + private static File mockSiftsFile; + private Entry siftsEntry; private StructureFile pdb; @@ -118,8 +128,8 @@ public class SiftsClient implements SiftsClientI private enum ResidueDetailType { - NAME_SEC_STRUCTURE("nameSecondaryStructure"), CODE_SEC_STRUCTURE( - "codeSecondaryStructure"), ANNOTATION("Annotation"); + NAME_SEC_STRUCTURE("nameSecondaryStructure"), + CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation"); private String code; private ResidueDetailType(String code) @@ -148,7 +158,6 @@ public class SiftsClient implements SiftsClientI siftsEntry = parseSIFTs(siftsFile); } - /** * Parse the given SIFTs File and return a JAXB POJO of parsed data * @@ -186,6 +195,14 @@ public class SiftsClient implements SiftsClientI */ public static File getSiftsFile(String pdbId) throws SiftsException { + /* + * return mocked file if it has been set + */ + if (mockSiftsFile != null) + { + return mockSiftsFile; + } + String siftsFileName = SiftsSettings.getSiftDownloadDirectory() + pdbId.toLowerCase() + ".xml.gz"; File siftsFile = new File(siftsFileName); @@ -211,6 +228,10 @@ public class SiftsClient implements SiftsClientI return new File(siftsFileName); } } + else + { + return siftsFile; + } } try { @@ -240,8 +261,9 @@ public class SiftsClient implements SiftsClientI try { attr = Files.readAttributes(filePath, BasicFileAttributes.class); - diffInDays = (int) ((new Date().getTime() - attr.lastModifiedTime() - .toMillis()) / (1000 * 60 * 60 * 24)); + diffInDays = (int) ((new Date().getTime() + - attr.lastModifiedTime().toMillis()) + / (1000 * 60 * 60 * 24)); // System.out.println("Diff in days : " + diffInDays); } catch (IOException e) { @@ -258,8 +280,8 @@ public class SiftsClient implements SiftsClientI * @throws SiftsException * @throws IOException */ - public static File downloadSiftsFile(String pdbId) throws SiftsException, - IOException + public static File downloadSiftsFile(String pdbId) + throws SiftsException, IOException { if (pdbId.contains(".cif")) { @@ -275,21 +297,23 @@ public class SiftsClient implements SiftsClientI { siftsDownloadDir.mkdirs(); } - // System.out.println(">> Download ftp url : " + siftsFileFTPURL); - URL url = new URL(siftsFileFTPURL); - URLConnection conn = url.openConnection(); - InputStream inputStream = conn.getInputStream(); - FileOutputStream outputStream = new FileOutputStream( - downloadedSiftsFile); - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead = -1; - while ((bytesRead = inputStream.read(buffer)) != -1) - { - outputStream.write(buffer, 0, bytesRead); - } - outputStream.close(); - inputStream.close(); - // System.out.println(">>> File downloaded : " + downloadedSiftsFile); + // System.out.println(">> Download ftp url : " + siftsFileFTPURL); + // long now = System.currentTimeMillis(); + URL url = new URL(siftsFileFTPURL); + URLConnection conn = url.openConnection(); + InputStream inputStream = conn.getInputStream(); + FileOutputStream outputStream = new FileOutputStream( + downloadedSiftsFile); + byte[] buffer = new byte[BUFFER_SIZE]; + int bytesRead = -1; + while ((bytesRead = inputStream.read(buffer)) != -1) + { + outputStream.write(buffer, 0, bytesRead); + } + outputStream.close(); + inputStream.close(); + // System.out.println(">>> File downloaded : " + downloadedSiftsFile + // + " took " + (System.currentTimeMillis() - now) + "ms"); return new File(downloadedSiftsFile); } @@ -323,41 +347,29 @@ public class SiftsClient implements SiftsClientI public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws SiftsException { - DBRefEntryI sourceDBRef = null; - sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) + List dbRefs = seq.getPrimaryDBRefs(); + if (dbRefs == null || dbRefs.size() < 1) { - return sourceDBRef; + throw new SiftsException( + "Source DBRef could not be determined. DBRefs might not have been retrieved."); } - else + + for (DBRefEntry dbRef : dbRefs) { - DBRefEntry[] dbRefs = seq.getDBRefs(); - if (dbRefs == null || dbRefs.length < 1) + if (dbRef == null || dbRef.getAccessionId() == null + || dbRef.getSource() == null) { - throw new SiftsException( - "Source DBRef could not be determined. DBRefs might not have been retrieved."); + continue; } - - for (DBRefEntryI dbRef : dbRefs) + String canonicalSource = DBRefUtils + .getCanonicalName(dbRef.getSource()); + if (isValidDBRefEntry(dbRef) + && (canonicalSource.equalsIgnoreCase(DBRefSource.UNIPROT) + || canonicalSource.equalsIgnoreCase(DBRefSource.PDB))) { - if (dbRef == null || dbRef.getAccessionId() == null - || dbRef.getSource() == null) - { - continue; - } - if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef - .getSource().equalsIgnoreCase(DBRefSource.PDB))) - { - seq.setSourceDBRef(dbRef); - return dbRef; - } + return dbRef; } } - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) - { - return sourceDBRef; - } throw new SiftsException("Could not get source DB Ref"); } @@ -402,8 +414,8 @@ public class SiftsClient implements SiftsClientI String pdbFile, String chain) throws SiftsException { structId = (chain == null) ? pdbId : pdbId + "|" + chain; - System.out.println("Getting mapping for: " + pdbId + "|" + chain - + " : seq- " + seq.getName()); + System.out.println("Getting SIFTS mapping for " + structId + ": seq " + + seq.getName()); final StringBuilder mappingDetails = new StringBuilder(128); PrintStream ps = new PrintStream(System.out) @@ -440,7 +452,7 @@ public class SiftsClient implements SiftsClientI String originalSeq = AlignSeq.extractGaps( jalview.util.Comparison.GapChars, seq.getSequenceAsString()); HashMap mapping = new HashMap(); - DBRefEntryI sourceDBRef = seq.getSourceDBRef(); + DBRefEntryI sourceDBRef; sourceDBRef = getValidSourceDBRef(seq); // TODO ensure sequence start/end is in the same coordinate system and // consistent with the choosen sourceDBRef @@ -482,12 +494,13 @@ public class SiftsClient implements SiftsClientI int pdbStart = UNASSIGNED; int pdbEnd = UNASSIGNED; - Integer[] keys = mapping.keySet().toArray(new Integer[0]); - Arrays.sort(keys); - if (keys.length < 1) + if (mapping.isEmpty()) { - throw new SiftsException(">>> Empty SIFTS mapping generated!!"); + throw new SiftsException("SIFTS mapping failed"); } + + Integer[] keys = mapping.keySet().toArray(new Integer[0]); + Arrays.sort(keys); seqStart = keys[0]; seqEnd = keys[keys.length - 1]; @@ -499,8 +512,9 @@ public class SiftsClient implements SiftsClientI int orignalSeqStart = seq.getStart(); if (orignalSeqStart >= 1) { - int subSeqStart = (seqStart >= orignalSeqStart) ? seqStart - - orignalSeqStart : 0; + int subSeqStart = (seqStart >= orignalSeqStart) + ? seqStart - orignalSeqStart + : 0; int subSeqEnd = seqEnd - (orignalSeqStart - 1); subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() : subSeqEnd; @@ -521,13 +535,13 @@ public class SiftsClient implements SiftsClientI if (os != null) { MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(pdbStart); - mop.setSeqEnd(pdbEnd); + mop.setSeqStart(seqStart); + mop.setSeqEnd(seqEnd); mop.setSeqName(seq.getName()); mop.setSeqResidue(matchedSeq); - mop.setStrStart(seqStart); - mop.setStrEnd(seqEnd); + mop.setStrStart(pdbStart); + mop.setStrEnd(pdbEnd); mop.setStrName(structId); mop.setStrResidue(targetStrucSeqs.toString()); @@ -561,22 +575,11 @@ public class SiftsClient implements SiftsClientI { pdbRefDb = cRefDb; } - if (cRefDb.getDbCoordSys() - .equalsIgnoreCase(seqCoordSys.getName()) + if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) && isAccessionMatched(cRefDb.getDbAccessionId())) { - String resNumIndexString = cRefDb.getDbResNum() - .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED) - : cRefDb.getDbResNum(); - try - { - currSeqIndex = Integer.valueOf(resNumIndexString); - } catch (NumberFormatException nfe) - { - currSeqIndex = Integer.valueOf(resNumIndexString - .split("[a-zA-Z]")[0]); - continue; - } + currSeqIndex = getLeadingIntegerValue(cRefDb.getDbResNum(), + UNASSIGNED); if (pdbRefDb != null) { break;// exit loop if pdb and uniprot are already found @@ -589,19 +592,12 @@ public class SiftsClient implements SiftsClientI } if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) { - int resNum; - try - { - resNum = (pdbRefDb == null) ? Integer.valueOf(residue - .getDbResNum()) : Integer.valueOf(pdbRefDb - .getDbResNum()); - } catch (NumberFormatException nfe) - { - resNum = (pdbRefDb == null) ? Integer.valueOf(residue - .getDbResNum()) : Integer.valueOf(pdbRefDb - .getDbResNum().split("[a-zA-Z]")[0]); - continue; - } + + int resNum = (pdbRefDb == null) + ? getLeadingIntegerValue(residue.getDbResNum(), + UNASSIGNED) + : getLeadingIntegerValue(pdbRefDb.getDbResNum(), + UNASSIGNED); if (isResidueObserved(residue) || seqCoordSys == CoordinateSys.UNIPROT) @@ -616,12 +612,37 @@ public class SiftsClient implements SiftsClientI omitNonObserved.add(currSeqIndex); ++nonObservedShiftIndex; } - mapping.put(currSeqIndex - nonObservedShiftIndex, new int[] { - Integer.valueOf(resNum), UNASSIGNED }); + mapping.put(currSeqIndex - nonObservedShiftIndex, + new int[] + { Integer.valueOf(resNum), UNASSIGNED }); } } } } + + /** + * Get the leading integer part of a string that begins with an integer. + * + * @param input + * - the string input to process + * @param failValue + * - value returned if unsuccessful + * @return + */ + static int getLeadingIntegerValue(String input, int failValue) + { + if (input == null) + { + return failValue; + } + String[] parts = input.split("(?=\\D)(?<=\\d)"); + if (parts != null && parts.length > 0 && parts[0].matches("[0-9]+")) + { + return Integer.valueOf(parts[0]); + } + return failValue; + } + /** * * @param chainId @@ -773,8 +794,6 @@ public class SiftsClient implements SiftsClientI } } - - @Override public Entity getEntityById(String id) throws SiftsException { @@ -798,7 +817,8 @@ public class SiftsClient implements SiftsClientI */ public Entity getEntityByMostOptimalMatchedId(String chainId) { - // System.out.println("---> advanced greedy entityId matching block entered.."); + // System.out.println("---> advanced greedy entityId matching block + // entered.."); List entities = siftsEntry.getEntity(); SiftsEntitySortPojo[] sPojo = new SiftsEntitySortPojo[entities.size()]; int count = 0; @@ -838,6 +858,10 @@ public class SiftsClient implements SiftsClientI if (sPojo[0].entityId != null) { + if (sPojo[0].pid < 1) + { + return null; + } for (Entity entity : entities) { if (!entity.getEntityId().equalsIgnoreCase(sPojo[0].entityId)) @@ -850,8 +874,8 @@ public class SiftsClient implements SiftsClientI return null; } - private class SiftsEntitySortPojo implements - Comparable + private class SiftsEntitySortPojo + implements Comparable { public String entityId; @@ -880,8 +904,7 @@ public class SiftsClient implements SiftsClientI private int nonObservedShiftIndex; - public SegmentHelperPojo(SequenceI seq, - HashMap mapping, + public SegmentHelperPojo(SequenceI seq, HashMap mapping, TreeMap resNumMap, List omitNonObserved, int nonObservedShiftIndex) { @@ -944,7 +967,7 @@ public class SiftsClient implements SiftsClientI } @Override - public StringBuffer getMappingOutput(MappingOutputPojo mp) + public StringBuilder getMappingOutput(MappingOutputPojo mp) throws SiftsException { String seqRes = mp.getSeqResidue(); @@ -966,10 +989,10 @@ public class SiftsClient implements SiftsClientI int nochunks = ((seqRes.length()) / len) + ((seqRes.length()) % len > 0 ? 1 : 0); // output mappings - StringBuffer output = new StringBuffer(); + StringBuilder output = new StringBuilder(512); output.append(NEWLINE); - output.append("Sequence \u27f7 Structure mapping details").append( - NEWLINE); + output.append("Sequence \u27f7 Structure mapping details") + .append(NEWLINE); output.append("Method: SIFTS"); output.append(NEWLINE).append(NEWLINE); @@ -987,12 +1010,13 @@ public class SiftsClient implements SiftsClientI output.append(String.valueOf(pdbEnd)); output.append(NEWLINE).append(NEWLINE); + ScoreMatrix pam250 = ScoreModels.getInstance().getPam250(); int matchedSeqCount = 0; for (int j = 0; j < nochunks; j++) { // Print the first aligned sequence - output.append(new Format("%" + (maxid) + "s").form(seqName)).append( - " "); + output.append(new Format("%" + (maxid) + "s").form(seqName)) + .append(" "); for (int i = 0; i < len; i++) { @@ -1005,25 +1029,29 @@ public class SiftsClient implements SiftsClientI output.append(NEWLINE); output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); - // Print out the matching chars + /* + * Print out the match symbols: + * | for exact match (ignoring case) + * . if PAM250 score is positive + * else a space + */ for (int i = 0; i < len; i++) { try { if ((i + (j * len)) < seqRes.length()) { - if (seqRes.charAt(i + (j * len)) == strRes - .charAt(i + (j * len)) - && !jalview.util.Comparison.isGap(seqRes.charAt(i - + (j * len)))) + char c1 = seqRes.charAt(i + (j * len)); + char c2 = strRes.charAt(i + (j * len)); + boolean sameChar = Comparison.isSameResidue(c1, c2, false); + if (sameChar && !Comparison.isGap(c1)) { matchedSeqCount++; output.append("|"); } else if (type.equals("pep")) { - if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), - strRes.charAt(i + (j * len))) > 0) + if (pam250.getPairwiseScore(c1, c2) > 0) { output.append("."); } @@ -1060,8 +1088,8 @@ public class SiftsClient implements SiftsClientI { throw new SiftsException(">>> Low PID detected for SIFTs mapping..."); } - output.append("Length of alignment = " + seqRes.length()).append( - NEWLINE); + output.append("Length of alignment = " + seqRes.length()) + .append(NEWLINE); output.append(new Format("Percentage ID = %2.2f").form(pid)); return output; } @@ -1096,4 +1124,9 @@ public class SiftsClient implements SiftsClientI return siftsEntry.getDbVersion(); } + public static void setMockSiftsFile(File file) + { + mockSiftsFile = file; + } + }