X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fstructure%2FStructureSelectionManager.java;h=0f82650c2226e3a7acf0e9313fc5a59fbdcf7c8c;hb=9bfda76754fb426a471e1c12c9f0ca0c4c1784bc;hp=24320b54276f0f29539677311bd6a85a6914c0ff;hpb=3a65da0cf59dba22c4e15183db9f336486109538;p=jalview.git diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index 24320b5..0f82650 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -26,10 +26,12 @@ import java.util.Arrays; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; +import java.util.HashSet; import java.util.IdentityHashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.Vector; import jalview.analysis.AlignSeq; @@ -53,6 +55,8 @@ import jalview.gui.IProgressIndicator; import jalview.io.AppletFormatAdapter; import jalview.io.DataSourceType; import jalview.io.StructureFile; +import jalview.structure.StructureImportSettings.TFType; +import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.MessageManager; import jalview.util.Platform; @@ -154,16 +158,18 @@ public class StructureSelectionManager { if (mappings.isEmpty()) { - System.err.println("reportMapping: No PDB/Sequence mappings."); + jalview.bin.Console + .errPrintln("reportMapping: No PDB/Sequence mappings."); } else { - System.err.println( + jalview.bin.Console.errPrintln( "reportMapping: There are " + mappings.size() + " mappings."); int i = 0; for (StructureMapping sm : mappings) { - System.err.println("mapping " + i++ + " : " + sm.pdbfile); + jalview.bin.Console + .errPrintln("mapping " + i++ + " : " + sm.pdbfile); } } } @@ -326,7 +332,7 @@ public class StructureSelectionManager IProgressIndicator progress) { return computeMapping(true, sequence, targetChains, pdbFile, protocol, - progress); + progress, null, null, true); } /** @@ -349,10 +355,50 @@ public class StructureSelectionManager */ synchronized public StructureFile setMapping(boolean forStructureView, SequenceI[] sequenceArray, String[] targetChainIds, - String pdbFile, DataSourceType sourceType) + String pdbFile, DataSourceType sourceType, TFType tft, + String paeFilename) + { + return setMapping(forStructureView, sequenceArray, targetChainIds, + pdbFile, sourceType, tft, paeFilename, true); + } + + /** + * create sequence structure mappings between each sequence and the given + * pdbFile (retrieved via the given protocol). Either constructs a mapping + * using NW alignment or derives one from any available SIFTS mapping data. + * + * @param forStructureView + * when true, record the mapping for use in mouseOvers + * + * @param sequenceArray + * - one or more sequences to be mapped to pdbFile + * @param targetChainIds + * - optional chain specification for mapping each sequence to pdb + * (may be nill, individual elements may be nill) - JBPNote: JAL-2693 + * - this should be List>, empty lists indicate no + * predefined mappings + * @param pdbFile + * - structure data resource + * @param sourceType + * - how to resolve data from resource + * @param tft + * - specify how to interpret the temperature factor column in the + * atom data + * @param paeFilename + * - when not null, specifies a filename containing a matrix + * formatted in JSON using one of the known PAE formats + * @param doXferSettings + * - when true, transfer annotation to mapped sequences in + * sequenceArray + * @return null or the structure data parsed as a pdb file + */ + synchronized public StructureFile setMapping(boolean forStructureView, + SequenceI[] sequenceArray, String[] targetChainIds, + String pdbFile, DataSourceType sourceType, TFType tft, + String paeFilename, boolean doXferSettings) { return computeMapping(forStructureView, sequenceArray, targetChainIds, - pdbFile, sourceType, null); + pdbFile, sourceType, null, tft, paeFilename, doXferSettings); } /** @@ -377,12 +423,22 @@ public class StructureSelectionManager * @param IProgressIndicator * reference to UI component that maintains a progress bar for the * mapping operation + * @param tft + * - specify how to interpret the temperature factor column in the + * atom data + * @param paeFilename + * - when not null, specifies a filename containing a matrix + * formatted in JSON using one of the known PAE formats + * @param doXferSettings + * - when true, transfer annotation to mapped sequences in + * sequenceArray * @return null or the structure data parsed as a pdb file */ synchronized public StructureFile computeMapping(boolean forStructureView, SequenceI[] sequenceArray, String[] targetChainIds, String pdbFile, DataSourceType sourceType, - IProgressIndicator progress) + IProgressIndicator progress, TFType tft, String paeFilename, + boolean doXferSettings) { long progressSessionId = System.currentTimeMillis() * 3; @@ -392,8 +448,7 @@ public class StructureSelectionManager // FIXME: possibly should just delete boolean parseSecStr = processSecondaryStructure - ? isStructureFileProcessed(pdbFile, sequenceArray) - : false; + && !isStructureFileProcessed(pdbFile, sequenceArray); StructureFile pdb = null; boolean isMapUsingSIFTs = SiftsSettings.isMapWithSifts(); @@ -402,10 +457,19 @@ public class StructureSelectionManager // FIXME if sourceType is not null, we've lost data here sourceType = AppletFormatAdapter.checkProtocol(pdbFile); pdb = new JmolParser(false, pdbFile, sourceType); + if (paeFilename != null) + { + pdb.setPAEMatrix(paeFilename); + } + pdb.setTemperatureFactorType(tft); pdb.addSettings(parseSecStr && processSecondaryStructure, parseSecStr && addTempFacAnnot, parseSecStr && secStructServices); + // save doXferSettings and reset after doParse() + boolean temp = pdb.getDoXferSettings(); + pdb.setDoXferSettings(doXferSettings); pdb.doParse(); + pdb.setDoXferSettings(temp); if (pdb.getId() != null && pdb.getId().trim().length() > 0 && DataSourceType.FILE == sourceType) { @@ -460,32 +524,15 @@ public class StructureSelectionManager { ds = ds.getDatasetSequence(); } - + List putativePDBe = PDBEntryUtils.selectPutativePDBe(seq,ds, pdb); + if (targetChainIds != null && targetChainIds[s] != null) { infChain = false; targetChainId = targetChainIds[s]; } - else if (seq.getName().indexOf("|") > -1) - { - targetChainId = seq.getName() - .substring(seq.getName().lastIndexOf("|") + 1); - if (targetChainId.length() > 1) - { - if (targetChainId.trim().length() == 0) - { - targetChainId = " "; - } - else - { - // not a valid chain identifier - targetChainId = ""; - } - } - } - else - { - targetChainId = ""; + else { + targetChainId = PDBEntryUtils.inferChainId(seq); } /* @@ -497,6 +544,7 @@ public class StructureSelectionManager String maxChainId = " "; PDBChain maxChain = null; boolean first = true; + PDBChain idLengthChain = null; for (PDBChain chain : pdb.getChains()) { if (targetChainId.length() > 0 && !targetChainId.equals(chain.id) @@ -504,15 +552,22 @@ public class StructureSelectionManager { continue; // don't try to map chains don't match. } + PDBEntry putativeChain = null; + if (!putativePDBe.isEmpty() && (putativeChain = PDBEntryUtils + .selectPutativePDBEntry(putativePDBe, chain)) == null) + { + continue; + } // TODO: correctly determine sequence type for mixed na/peptide // structures final String type = chain.isNa ? AlignSeq.DNA : AlignSeq.PEP; AlignSeq as = AlignSeq.doGlobalNWAlignment(seq, chain.sequence, type); - // equivalent to: - // AlignSeq as = new AlignSeq(sequence[s], chain.sequence, type); - // as.calcScoreMatrix(); - // as.traceAlignment(); + // TODO: JAL-4366 determinine of a crummy alignment but exact match should make this chain the one to be mapped to a 3di sequence + if (as.s1str.length() == as.s2str.length()) + { + idLengthChain = chain; + } if (first || as.maxscore > max || (as.maxscore == max && chain.id.equals(targetChainId))) @@ -528,13 +583,13 @@ public class StructureSelectionManager { continue; } - if (sourceType == DataSourceType.PASTE) { pdbFile = "INLINE" + pdb.getId(); } - List seqToStrucMapping = new ArrayList<>(); + + List foundSiftsMappings = new ArrayList<>(); if (isMapUsingSIFTs && seq.isProtein()) { if (progress != null) @@ -559,26 +614,15 @@ public class StructureSelectionManager pdb.getId().toLowerCase(Locale.ROOT)); maxChain.transferResidueAnnotation(siftsMapping, null); ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0)); + foundSiftsMappings.add(siftsMapping); } catch (SiftsException e) { - // fall back to NW alignment Console.error(e.getMessage()); - StructureMapping nwMapping = getNWMappings(seq, pdbFile, - targetChainId, maxChain, pdb, maxAlignseq); - seqToStrucMapping.add(nwMapping); - maxChain.makeExactMapping(maxAlignseq, seq); - maxChain.transferRESNUMFeatures(seq, "IEA:Jalview", - pdb.getId().toLowerCase(Locale.ROOT)); // FIXME: is - // this - // "IEA:Jalview" ? - maxChain.transferResidueAnnotation(nwMapping, sqmpping); - ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0)); } } else { - List foundSiftsMappings = new ArrayList<>(); for (PDBChain chain : pdb.getChains()) { StructureMapping siftsMapping = null; @@ -594,45 +638,62 @@ public class StructureSelectionManager chain.transferResidueAnnotation(siftsMapping, null); } catch (SiftsException e) { - System.err.println(e.getMessage()); + jalview.bin.Console.errPrintln(e.getMessage()); } catch (Exception e) { - System.err.println( + jalview.bin.Console.errPrintln( "Unexpected exception during SIFTS mapping - falling back to NW for this sequence/structure pair"); - System.err.println(e.getMessage()); + jalview.bin.Console.errPrintln(e.getMessage()); } } + // If sifts was successful, add mappings and return if (!foundSiftsMappings.isEmpty()) { - seqToStrucMapping.addAll(foundSiftsMappings); ds.addPDBId(sqmpping.getTo().getAllPDBEntries().get(0)); } - else - { - StructureMapping nwMapping = getNWMappings(seq, pdbFile, - maxChainId, maxChain, pdb, maxAlignseq); - seqToStrucMapping.add(nwMapping); - maxChain.transferRESNUMFeatures(seq, null, - pdb.getId().toLowerCase(Locale.ROOT)); // FIXME: is this - // "IEA:Jalview" ? - maxChain.transferResidueAnnotation(nwMapping, sqmpping); - ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0)); - } + } + + // If sifts was successful, add mappings and return + if (!foundSiftsMappings.isEmpty()) + { + seqToStrucMapping.addAll(foundSiftsMappings); } } - else + if (foundSiftsMappings.isEmpty()) { - if (progress != null) + // Not doing SIFTS, or SIFTS failed for some reason. + + // first check if we should use an identity mapping + if (idLengthChain != null && maxAlignseq.getS2Coverage() < 0.75) { - progress.setProgressBar( - MessageManager.getString( - "status.obtaining_mapping_with_nw_alignment"), - progressSessionId); + Console.info( + "Assuming 3Dsi identity mapping between structure and sequence"); + StructureMapping matchMapping = getIdMappings(seq, pdbFile, + idLengthChain.id, idLengthChain, pdb); + seqToStrucMapping.add(matchMapping); + ds.addPDBId(idLengthChain.sequence.getAllPDBEntries().get(0)); + Console.info("Mapping added."); + } + else + { + if (maxAlignseq.getS1Coverage()<0.15 && maxAlignseq.getS2Coverage()<0.15) + { + // skip this - the NW alignment is spurious + continue; + } + // Construct a needleman wunsch mapping instead. + if (progress != null) + { + progress.setProgressBar( + MessageManager.getString( + "status.obtaining_mapping_with_nw_alignment"), + progressSessionId); + } + StructureMapping nwMapping = getNWMappings(seq, pdbFile, + maxChainId, maxChain, pdb, maxAlignseq); + seqToStrucMapping.add(nwMapping); + ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0)); } - StructureMapping nwMapping = getNWMappings(seq, pdbFile, maxChainId, - maxChain, pdb, maxAlignseq); - seqToStrucMapping.add(nwMapping); - ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0)); } if (forStructureView) { @@ -660,7 +721,7 @@ public class StructureSelectionManager private boolean isStructureFileProcessed(String pdbFile, SequenceI[] sequenceArray) { - boolean parseSecStr = true; + boolean processed = false; if (isPDBFileRegistered(pdbFile)) { for (SequenceI sq : sequenceArray) @@ -680,13 +741,13 @@ public class StructureSelectionManager // passed, not the structure data ID - if (PDBfile.isCalcIdForFile(ala, findIdForPDBFile(pdbFile))) { - parseSecStr = false; + processed = true; } } } } } - return parseSecStr; + return processed; } public void addStructureMapping(StructureMapping sm) @@ -734,6 +795,17 @@ public class StructureSelectionManager } return curChainMapping; } + + /** + * construct a mapping based on a pairwise alignment of the sequence and chain + * @param seq + * @param pdbFile + * @param maxChainId + * @param maxChain + * @param pdb + * @param maxAlignseq + * @return + */ private StructureMapping getNWMappings(SequenceI seq, String pdbFile, String maxChainId, PDBChain maxChain, StructureFile pdb, @@ -815,6 +887,82 @@ public class StructureSelectionManager maxChain.transferResidueAnnotation(nwMapping, sqmpping); return nwMapping; } + + /** + * construct a 1:1 mapping using given residue and sequence numbering + * @param seq + * @param pdbFile + * @param identityChainId + * @param identityChain + * @param pdb + * @return + */ + + private StructureMapping getIdMappings(SequenceI seq, String pdbFile, + String identityChainId, PDBChain identityChain, StructureFile pdb) + { + final StringBuilder mappingDetails = new StringBuilder(128); + mappingDetails.append(NEWLINE) + .append("Sequence \u27f7 Structure mapping details"); + mappingDetails.append(NEWLINE); + mappingDetails.append("Method: Matching length 1:1"); + mappingDetails.append(NEWLINE).append("PDB Sequence is :") + .append(NEWLINE).append("Sequence = ") + .append(identityChain.sequence.getSequenceAsString()); + mappingDetails.append(NEWLINE).append("No of residues = ") + .append(identityChain.residues.size()).append(NEWLINE) + .append(NEWLINE); + + mappingDetails.append(NEWLINE) + .append("Aligned Sequence is: " + seq.getDisplayId(true)); + mappingDetails.append(NEWLINE) + .append("Sequence = " + seq.getSequenceAsString()); + + int from = Math.max(seq.getStart(),identityChain.sequence.getStart()); + int to = Math.min(seq.getEnd(), identityChain.sequence.getEnd()); + jalview.datamodel.Mapping sqmpping = new jalview.datamodel.Mapping(seq, + new MapList(new int[] + { from,to }, + new int[] + { from,to }, + 1, 1)); + identityChain.mapChainWith(sqmpping, seq); + + identityChain.transferRESNUMFeatures(seq, null, + pdb.getId().toLowerCase(Locale.ROOT)); + + // Construct mapping + // TODO REFACTOR TO PDBChain as a builder + HashMap mapping = new HashMap<>(); + int resNum = -10000; + int index = 0; + char insCode = ' '; + + do + { + Atom tmp = identityChain.atoms.elementAt(index); + if ((resNum != tmp.resNumber || insCode != tmp.insCode) + && tmp.alignmentMapping != -1) + { + resNum = tmp.resNumber; + insCode = tmp.insCode; + if (tmp.alignmentMapping >= -1) + { + mapping.put(tmp.alignmentMapping + 1, + new int[] + { tmp.resNumber, tmp.atomIndex }); + } + } + + index++; + } while (index < identityChain.atoms.size()); + + StructureMapping idMapping = new StructureMapping(seq, pdbFile, + pdb.getId(), identityChainId, mapping, + mappingDetails.toString()); + identityChain.transferResidueAnnotation(idMapping, sqmpping); + return idMapping; + } public void removeStructureViewerListener(Object svl, String[] pdbfiles) { @@ -1002,7 +1150,7 @@ public class StructureSelectionManager int indexpos = sm.getSeqPos(atom.getPdbResNum()); if (lastipos != indexpos || lastseq != sm.sequence) { - results.addResult(sm.sequence, indexpos, indexpos); + results.appendResult(sm.sequence, indexpos, indexpos); lastipos = indexpos; lastseq = sm.sequence; // construct highlighted sequence list @@ -1231,7 +1379,7 @@ public class StructureSelectionManager * * if (mappings[j].sequence == seq && mappings[j].getPdbId().equals(pdbid) * && mappings[j].pdbfile.equals(sl.getPdbFile())) { - * System.out.println(pdbid+" "+mappings[j].getPdbId() +" + * jalview.bin.Console.outPrintln(pdbid+" "+mappings[j].getPdbId() +" * "+mappings[j].pdbfile); * * java.awt.Color col; for(int index=0; index getPdbFileNameIdMap() + { + return pdbFileNameId; + } + + public Map getPdbIdFileNameMap() + { + return pdbIdFileName; + } + }