From 49f0437d385ee4c6dbe701180a4ba704da76b5f8 Mon Sep 17 00:00:00 2001 From: tcofoegbu Date: Wed, 11 Nov 2015 11:49:42 +0000 Subject: [PATCH] JAL-1957 JAL-1479 Further optimisation of SIFTs Client and addition of support for multi-chain mapping of uniprot <-> pdb residues --- src/jalview/ext/jmol/JmolCommands.java | 8 +- .../structure/StructureSelectionManager.java | 57 +++++-- src/jalview/ws/sifts/SiftsClient.java | 167 ++++++++++++-------- test/jalview/ws/sifts/SiftsClientTest.java | 2 +- 4 files changed, 155 insertions(+), 79 deletions(-) diff --git a/src/jalview/ext/jmol/JmolCommands.java b/src/jalview/ext/jmol/JmolCommands.java index 19f535c..d5676c5 100644 --- a/src/jalview/ext/jmol/JmolCommands.java +++ b/src/jalview/ext/jmol/JmolCommands.java @@ -64,7 +64,9 @@ public class JmolCommands ArrayList str = new ArrayList(); if (mapping == null || mapping.length < 1) + { continue; + } int lastPos = -1; for (int s = 0; s < sequence[pdbfnum].length; s++) @@ -85,14 +87,18 @@ public class JmolCommands int pos = mapping[m].getPDBResNum(asp.findPosition(r)); if (pos < 1 || pos == lastPos) + { continue; + } lastPos = pos; Color col = sr.getResidueBoxColour(sequence[pdbfnum][s], r); if (fr != null) + { col = fr.findFeatureColour(col, sequence[pdbfnum][s], r); + } String newSelcom = (mapping[m].getChain() != " " ? ":" + mapping[m].getChain() : "") + "/" @@ -125,7 +131,7 @@ public class JmolCommands command.append("select " + pos); command.append(newSelcom); } - break; + // break; } } } diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index 13aae26..3d5a975 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -326,7 +326,7 @@ public class StructureSelectionManager * @param forStructureView * when true, record the mapping for use in mouseOvers * - * @param sequence + * @param sequenceArray * - one or more sequences to be mapped to pdbFile * @param targetChains * - optional chain specification for mapping each sequence to pdb @@ -338,7 +338,7 @@ public class StructureSelectionManager * @return null or the structure data parsed as a pdb file */ synchronized public PDBfile setMapping(boolean forStructureView, - SequenceI[] sequence, String[] targetChains, String pdbFile, + SequenceI[] sequenceArray, String[] targetChains, String pdbFile, String protocol) { /* @@ -348,7 +348,7 @@ public class StructureSelectionManager boolean parseSecStr = processSecondaryStructure; if (isPDBFileRegistered(pdbFile)) { - for (SequenceI sq : sequence) + for (SequenceI sq : sequenceArray) { SequenceI ds = sq; while (ds.getDatasetSequence() != null) @@ -372,15 +372,25 @@ public class StructureSelectionManager } } PDBfile pdb = null; + boolean isMapUsingSIFTs = Boolean.valueOf(jalview.bin.Cache.getDefault( + "MAP_WITH_SIFTS", "false")); + SiftsClient siftsClient = null; try { pdb = new PDBfile(addTempFacAnnot, parseSecStr, secStructServices, pdbFile, protocol); + if (isMapUsingSIFTs) + { + siftsClient = new SiftsClient(pdb); + } if (pdb.id != null && pdb.id.trim().length() > 0 && AppletFormatAdapter.FILE.equals(protocol)) { registerPDBFile(pdb.id.trim(), pdbFile); } + } catch (SiftsException e) + { + e.printStackTrace(); } catch (Exception ex) { ex.printStackTrace(); @@ -388,10 +398,10 @@ public class StructureSelectionManager } String targetChain; - for (int s = 0; s < sequence.length; s++) + for (int s = 0; s < sequenceArray.length; s++) { boolean infChain = true; - final SequenceI seq = sequence[s]; + final SequenceI seq = sequenceArray[s]; if (targetChains != null && targetChains[s] != null) { infChain = false; @@ -465,16 +475,29 @@ public class StructureSelectionManager pdbFile = "INLINE" + pdb.id; } - StructureMapping seqToStrucMapping = null; - boolean isMapViaSIFTs = Boolean.valueOf(jalview.bin.Cache.getDefault( - "MAP_WITH_SIFTS", "false")); - if (isMapViaSIFTs) + ArrayList seqToStrucMapping = null; + if (isMapUsingSIFTs) { - SiftsClient siftsClient = new SiftsClient(pdb); try { - seqToStrucMapping = siftsClient.getSiftsStructureMapping(seq, - pdbFile, maxChainId); + seqToStrucMapping = new ArrayList(); + if (targetChain != null && !targetChain.trim().isEmpty()) + { + maxChainId = targetChain; + StructureMapping curChainMapping = siftsClient + .getSiftsStructureMapping(seq, pdbFile, targetChain); + seqToStrucMapping.add(curChainMapping); + } + else + { + for (PDBChain chain : pdb.chains) + { + maxChainId = chain.id; + StructureMapping curChainMapping = siftsClient + .getSiftsStructureMapping(seq, pdbFile, chain.id); + seqToStrucMapping.add(curChainMapping); + } + } } catch (SiftsException e) { System.err @@ -492,13 +515,15 @@ public class StructureSelectionManager if (forStructureView) { - mappings.add(seqToStrucMapping); + // mappings.add(seqToStrucMapping); + mappings.addAll(seqToStrucMapping); } } return pdb; } - private StructureMapping getNWMappings(SequenceI seq, String pdbFile, + private ArrayList getNWMappings(SequenceI seq, + String pdbFile, String maxChainId, PDBChain maxChain, PDBfile pdb, AlignSeq maxAlignseq) { @@ -574,7 +599,9 @@ public class StructureSelectionManager StructureMapping nwMapping = new StructureMapping(seq, pdbFile, pdb.id, maxChainId, mapping, mappingDetails.toString()); maxChain.transferResidueAnnotation(nwMapping, sqmpping); - return nwMapping; + ArrayList mappings = new ArrayList(); + mappings.add(nwMapping); + return mappings; } public void removeStructureViewerListener(Object svl, String[] pdbfiles) diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index e054b29..10e14f4 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -78,11 +78,7 @@ public class SiftsClient implements SiftsClientI private String segStartEnd; - private static final String UNIPROT_COORDINATE_SYS = "UniProt"; - - private static final String PDB_COORDINATE_SYS = "PDBresnum"; - - private String seqCoordSys = UNIPROT_COORDINATE_SYS; + private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; private static final int BUFFER_SIZE = 4096; @@ -104,45 +100,55 @@ public class SiftsClient implements SiftsClientI private final static String NEWLINE = System.lineSeparator(); + private String curSourceDBRef; + + private HashSet curDBRefAccessionIdsString; + + public enum CoordinateSys + { + UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); + private String name; + + private CoordinateSys(String name) + { + this.name = name; + } + + public String getName() + { + return name; + } + }; + /** * Fetch SIFTs file for the given PDB Id and construct an instance of * SiftsClient * * @param pdbId + * @throws SiftsException */ - public SiftsClient(PDBfile pdb) + public SiftsClient(PDBfile pdb) throws SiftsException { this.pdb = pdb; this.pdbId = pdb.id; - try - { - File siftsFile = getSiftsFile(pdbId); - siftsEntry = parseSIFTs(siftsFile); - } catch (Exception e) - { - e.printStackTrace(); - } + File siftsFile = getSiftsFile(pdbId); + siftsEntry = parseSIFTs(siftsFile); } /** - * Construct an instance of SiftsClient using the supplied SIFTs file - - * the SIFTs file should correspond to the given PDB Id + * Construct an instance of SiftsClient using the supplied SIFTs file - the + * SIFTs file should correspond to the given PDB Id * * @param pdbId * @param siftsFile + * @throws SiftsException + * @throws Exception */ - public SiftsClient(PDBfile pdb, File siftsFile) + public SiftsClient(PDBfile pdb, File siftsFile) throws SiftsException { this.pdb = pdb; this.pdbId = pdb.id; - try - { - siftsEntry = parseSIFTs(siftsFile); - } catch (Exception e) - { - e.printStackTrace(); - } - + siftsEntry = parseSIFTs(siftsFile); } /** @@ -154,7 +160,7 @@ public class SiftsClient implements SiftsClientI * @throws Exception * if a problem occurs while parsing the SIFTs XML */ - private Entry parseSIFTs(File siftFile) throws Exception + private Entry parseSIFTs(File siftFile) throws SiftsException { try { @@ -182,7 +188,7 @@ public class SiftsClient implements SiftsClientI { e.printStackTrace(); } - throw new Exception("Error parsing siftFile"); + throw new SiftsException("Error parsing siftFile"); } /** @@ -308,8 +314,8 @@ public class SiftsClient implements SiftsClientI continue; } if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase("uniprot") || dbRef - .getSource().equalsIgnoreCase("pdb"))) + && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef + .getSource().equalsIgnoreCase(DBRefSource.PDB))) { return dbRef; } @@ -414,14 +420,18 @@ public class SiftsClient implements SiftsClientI // set sequence coordinate system - default value is UniProt if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) { - seqCoordSys = PDB_COORDINATE_SYS; + seqCoordSys = CoordinateSys.PDB; } - ArrayList dbRefAccessionIdsString = new ArrayList(); + HashSet dbRefAccessionIdsString = new HashSet(); for (DBRefEntry dbref : seq.getDBRefs()) { - dbRefAccessionIdsString.add(dbref.getAccessionId()); + dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase()); } + dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase()); + + curDBRefAccessionIdsString = dbRefAccessionIdsString; + curSourceDBRef = sourceDBRef.getAccessionId(); // initialise all mapping positions to unassigned for (int residuePos[] : mapping) @@ -442,17 +452,25 @@ public class SiftsClient implements SiftsClientI { int currSeqIndex = UNASSIGNED; List cRefDbs = residue.getCrossRefDb(); + CrossRefDb pdbRefDb = null; for (CrossRefDb cRefDb : cRefDbs) { - if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys) - && dbRefAccessionIdsString.contains(cRefDb - .getDbAccessionId())) + if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) + { + pdbRefDb = cRefDb; + } + if (cRefDb.getDbCoordSys() + .equalsIgnoreCase(seqCoordSys.getName()) + && hasAccessionId(cRefDb.getDbAccessionId())) { String resNumIndexString = cRefDb.getDbResNum() .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED) : cRefDb.getDbResNum(); currSeqIndex = Integer.valueOf(resNumIndexString); - break; + if (pdbRefDb != null) + { + break;// exit loop if pdb and uniprot are already found + } } } if (currSeqIndex == UNASSIGNED) @@ -461,8 +479,24 @@ public class SiftsClient implements SiftsClientI } if (currSeqIndex > seq.getStart() && currSeqIndex <= seq.getEnd()) { - int resNum = Integer.valueOf(residue.getDbResNum()); - mapping[currSeqIndex][PDB_RES_POS] = Integer.valueOf(resNum); + int resNum; + try + { + resNum = (pdbRefDb == null) ? Integer.valueOf(residue + .getDbResNum()) : Integer.valueOf(pdbRefDb.getDbResNum()); + } catch (NumberFormatException nfe) + { + resNum = (pdbRefDb == null) ? Integer.valueOf(residue + .getDbResNum()) : Integer.valueOf(pdbRefDb + .getDbResNum().split("[a-zA-Z]")[0]); + } + try + { + mapping[currSeqIndex][PDB_RES_POS] = Integer.valueOf(resNum); + } catch (ArrayIndexOutOfBoundsException e) + { + // do nothing.. + } char resCharCode = ResidueProperties .getSingleCharacterCode(residue.getDbResName()); resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); @@ -478,10 +512,10 @@ public class SiftsClient implements SiftsClientI } padWithGaps(resNumMap); int counter = 0; - int seqStart = 0; - int seqEnd = 0; - int pdbStart = 0; - int pdbEnd = 0; + int seqStart = UNASSIGNED; + int seqEnd = UNASSIGNED; + int pdbStart = UNASSIGNED; + int pdbEnd = UNASSIGNED; boolean startDetected = false; for (int[] x : mapping) { @@ -520,31 +554,32 @@ public class SiftsClient implements SiftsClientI targetStrucSeqs.append(res); } - try + if (os != null) { - if (os != null) - { - MappingOutputPojo mop = new MappingOutputPojo(); - mop.setSeqStart(seqStart); - mop.setSeqEnd(seqEnd); - mop.setSeqName(seq.getName()); - mop.setSeqResidue(matchedSeq); - - mop.setStrStart(pdbStart); - mop.setStrEnd(pdbEnd); - mop.setStrName(structId); - mop.setStrResidue(targetStrucSeqs.toString()); - - mop.setType("pep"); - os.print(getMappingOutput(mop).toString()); - } - } catch (Exception ex) - { - ex.printStackTrace(); + MappingOutputPojo mop = new MappingOutputPojo(); + mop.setSeqStart(seqStart); + mop.setSeqEnd(seqEnd); + mop.setSeqName(seq.getName()); + mop.setSeqResidue(matchedSeq); + + mop.setStrStart(pdbStart); + mop.setStrEnd(pdbEnd); + mop.setStrName(structId); + mop.setStrResidue(targetStrucSeqs.toString()); + + mop.setType("pep"); + os.print(getMappingOutput(mop).toString()); } return mapping; } + private boolean hasAccessionId(String accession) + { + boolean isStrictMatch = true; + return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) + : curDBRefAccessionIdsString.contains(accession.toLowerCase()); + } + @Override public boolean isFoundInSiftsEntry(String accessionId) { @@ -559,6 +594,10 @@ public class SiftsClient implements SiftsClientI */ void padWithGaps(TreeMap resNumMap) { + if (resNumMap == null || resNumMap.isEmpty()) + { + return; + } Integer[] keys = resNumMap.keySet().toArray(new Integer[0]); Arrays.sort(keys); int firstIndex = keys[0]; @@ -766,6 +805,10 @@ public class SiftsClient implements SiftsClientI output.append(NEWLINE).append(NEWLINE); } float pid = (float) matchedSeqCount / seqRes.length() * 100; + if (pid < 2) + { + throw new SiftsException("Low PID detected for SIFTs mapping..."); + } output.append("Length of alignment = " + seqRes.length()) .append(NEWLINE); output.append(new Format("Percentage ID = %2.2f").form(pid)); diff --git a/test/jalview/ws/sifts/SiftsClientTest.java b/test/jalview/ws/sifts/SiftsClientTest.java index 7443f4a..4c94f66 100644 --- a/test/jalview/ws/sifts/SiftsClientTest.java +++ b/test/jalview/ws/sifts/SiftsClientTest.java @@ -78,7 +78,7 @@ public class SiftsClientTest { 94, u }, { 95, u }, { 96, u }, { 97, u } }; @BeforeTest(alwaysRun = true) - public void setUpSiftsClient() + public void setUpSiftsClient() throws SiftsException { // SIFTs entries are updated weekly - so use saved SIFTs file to enforce // test reproducibility -- 1.7.10.2