JAL-4371 minimal test and move PDBEntryUtils to correct package
[jalview.git] / src / jalview / structure / StructureSelectionManager.java
index ba3ef71..0f82650 100644 (file)
@@ -26,10 +26,12 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
 import java.util.Vector;
 
 import jalview.analysis.AlignSeq;
@@ -42,6 +44,7 @@ import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.Annotation;
+import jalview.datamodel.ContiguousI;
 import jalview.datamodel.HiddenColumns;
 import jalview.datamodel.PDBEntry;
 import jalview.datamodel.SearchResults;
@@ -52,6 +55,8 @@ import jalview.gui.IProgressIndicator;
 import jalview.io.AppletFormatAdapter;
 import jalview.io.DataSourceType;
 import jalview.io.StructureFile;
+import jalview.structure.StructureImportSettings.TFType;
+import jalview.util.MapList;
 import jalview.util.MappingUtils;
 import jalview.util.MessageManager;
 import jalview.util.Platform;
@@ -153,16 +158,18 @@ public class StructureSelectionManager
   {
     if (mappings.isEmpty())
     {
-      System.err.println("reportMapping: No PDB/Sequence mappings.");
+      jalview.bin.Console
+              .errPrintln("reportMapping: No PDB/Sequence mappings.");
     }
     else
     {
-      System.err.println(
+      jalview.bin.Console.errPrintln(
               "reportMapping: There are " + mappings.size() + " mappings.");
       int i = 0;
       for (StructureMapping sm : mappings)
       {
-        System.err.println("mapping " + i++ + " : " + sm.pdbfile);
+        jalview.bin.Console
+                .errPrintln("mapping " + i++ + " : " + sm.pdbfile);
       }
     }
   }
@@ -325,7 +332,7 @@ public class StructureSelectionManager
           IProgressIndicator progress)
   {
     return computeMapping(true, sequence, targetChains, pdbFile, protocol,
-            progress);
+            progress, null, null, true);
   }
 
   /**
@@ -348,10 +355,50 @@ public class StructureSelectionManager
    */
   synchronized public StructureFile setMapping(boolean forStructureView,
           SequenceI[] sequenceArray, String[] targetChainIds,
-          String pdbFile, DataSourceType sourceType)
+          String pdbFile, DataSourceType sourceType, TFType tft,
+          String paeFilename)
+  {
+    return setMapping(forStructureView, sequenceArray, targetChainIds,
+            pdbFile, sourceType, tft, paeFilename, true);
+  }
+
+  /**
+   * create sequence structure mappings between each sequence and the given
+   * pdbFile (retrieved via the given protocol). Either constructs a mapping
+   * using NW alignment or derives one from any available SIFTS mapping data.
+   * 
+   * @param forStructureView
+   *          when true, record the mapping for use in mouseOvers
+   * 
+   * @param sequenceArray
+   *          - one or more sequences to be mapped to pdbFile
+   * @param targetChainIds
+   *          - optional chain specification for mapping each sequence to pdb
+   *          (may be nill, individual elements may be nill) - JBPNote: JAL-2693
+   *          - this should be List<List<String>>, empty lists indicate no
+   *          predefined mappings
+   * @param pdbFile
+   *          - structure data resource
+   * @param sourceType
+   *          - how to resolve data from resource
+   * @param tft
+   *          - specify how to interpret the temperature factor column in the
+   *          atom data
+   * @param paeFilename
+   *          - when not null, specifies a filename containing a matrix
+   *          formatted in JSON using one of the known PAE formats
+   * @param doXferSettings
+   *          - when true, transfer annotation to mapped sequences in
+   *          sequenceArray
+   * @return null or the structure data parsed as a pdb file
+   */
+  synchronized public StructureFile setMapping(boolean forStructureView,
+          SequenceI[] sequenceArray, String[] targetChainIds,
+          String pdbFile, DataSourceType sourceType, TFType tft,
+          String paeFilename, boolean doXferSettings)
   {
     return computeMapping(forStructureView, sequenceArray, targetChainIds,
-            pdbFile, sourceType, null);
+            pdbFile, sourceType, null, tft, paeFilename, doXferSettings);
   }
 
   /**
@@ -376,12 +423,22 @@ public class StructureSelectionManager
    * @param IProgressIndicator
    *          reference to UI component that maintains a progress bar for the
    *          mapping operation
+   * @param tft
+   *          - specify how to interpret the temperature factor column in the
+   *          atom data
+   * @param paeFilename
+   *          - when not null, specifies a filename containing a matrix
+   *          formatted in JSON using one of the known PAE formats
+   * @param doXferSettings
+   *          - when true, transfer annotation to mapped sequences in
+   *          sequenceArray
    * @return null or the structure data parsed as a pdb file
    */
   synchronized public StructureFile computeMapping(boolean forStructureView,
           SequenceI[] sequenceArray, String[] targetChainIds,
           String pdbFile, DataSourceType sourceType,
-          IProgressIndicator progress)
+          IProgressIndicator progress, TFType tft, String paeFilename,
+          boolean doXferSettings)
   {
     long progressSessionId = System.currentTimeMillis() * 3;
 
@@ -391,8 +448,7 @@ public class StructureSelectionManager
     // FIXME: possibly should just delete
 
     boolean parseSecStr = processSecondaryStructure
-            ? isStructureFileProcessed(pdbFile, sequenceArray)
-            : false;
+            && !isStructureFileProcessed(pdbFile, sequenceArray);
 
     StructureFile pdb = null;
     boolean isMapUsingSIFTs = SiftsSettings.isMapWithSifts();
@@ -401,10 +457,19 @@ public class StructureSelectionManager
       // FIXME if sourceType is not null, we've lost data here
       sourceType = AppletFormatAdapter.checkProtocol(pdbFile);
       pdb = new JmolParser(false, pdbFile, sourceType);
+      if (paeFilename != null)
+      {
+        pdb.setPAEMatrix(paeFilename);
+      }
+      pdb.setTemperatureFactorType(tft);
       pdb.addSettings(parseSecStr && processSecondaryStructure,
               parseSecStr && addTempFacAnnot,
               parseSecStr && secStructServices);
+      // save doXferSettings and reset after doParse()
+      boolean temp = pdb.getDoXferSettings();
+      pdb.setDoXferSettings(doXferSettings);
       pdb.doParse();
+      pdb.setDoXferSettings(temp);
       if (pdb.getId() != null && pdb.getId().trim().length() > 0
               && DataSourceType.FILE == sourceType)
       {
@@ -459,32 +524,15 @@ public class StructureSelectionManager
       {
         ds = ds.getDatasetSequence();
       }
-
+      List <PDBEntry> putativePDBe = PDBEntryUtils.selectPutativePDBe(seq,ds, pdb); 
+      
       if (targetChainIds != null && targetChainIds[s] != null)
       {
         infChain = false;
         targetChainId = targetChainIds[s];
       }
-      else if (seq.getName().indexOf("|") > -1)
-      {
-        targetChainId = seq.getName()
-                .substring(seq.getName().lastIndexOf("|") + 1);
-        if (targetChainId.length() > 1)
-        {
-          if (targetChainId.trim().length() == 0)
-          {
-            targetChainId = " ";
-          }
-          else
-          {
-            // not a valid chain identifier
-            targetChainId = "";
-          }
-        }
-      }
-      else
-      {
-        targetChainId = "";
+      else {
+        targetChainId = PDBEntryUtils.inferChainId(seq);
       }
 
       /*
@@ -496,6 +544,7 @@ public class StructureSelectionManager
       String maxChainId = " ";
       PDBChain maxChain = null;
       boolean first = true;
+      PDBChain idLengthChain = null;
       for (PDBChain chain : pdb.getChains())
       {
         if (targetChainId.length() > 0 && !targetChainId.equals(chain.id)
@@ -503,15 +552,22 @@ public class StructureSelectionManager
         {
           continue; // don't try to map chains don't match.
         }
+        PDBEntry putativeChain = null;
+        if (!putativePDBe.isEmpty() && (putativeChain = PDBEntryUtils
+                .selectPutativePDBEntry(putativePDBe, chain)) == null)
+        {
+          continue;
+        }
         // TODO: correctly determine sequence type for mixed na/peptide
         // structures
         final String type = chain.isNa ? AlignSeq.DNA : AlignSeq.PEP;
         AlignSeq as = AlignSeq.doGlobalNWAlignment(seq, chain.sequence,
                 type);
-        // equivalent to:
-        // AlignSeq as = new AlignSeq(sequence[s], chain.sequence, type);
-        // as.calcScoreMatrix();
-        // as.traceAlignment();
+        // TODO: JAL-4366 determinine of a crummy alignment but exact match should make this chain the one to be mapped to a 3di sequence
+        if (as.s1str.length() == as.s2str.length())
+        {
+          idLengthChain = chain;
+        }
 
         if (first || as.maxscore > max
                 || (as.maxscore == max && chain.id.equals(targetChainId)))
@@ -527,13 +583,13 @@ public class StructureSelectionManager
       {
         continue;
       }
-
       if (sourceType == DataSourceType.PASTE)
       {
         pdbFile = "INLINE" + pdb.getId();
       }
-
       List<StructureMapping> seqToStrucMapping = new ArrayList<>();
+
+      List<StructureMapping> foundSiftsMappings = new ArrayList<>();
       if (isMapUsingSIFTs && seq.isProtein())
       {
         if (progress != null)
@@ -558,26 +614,15 @@ public class StructureSelectionManager
                     pdb.getId().toLowerCase(Locale.ROOT));
             maxChain.transferResidueAnnotation(siftsMapping, null);
             ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
+            foundSiftsMappings.add(siftsMapping);
 
           } catch (SiftsException e)
           {
-            // fall back to NW alignment
             Console.error(e.getMessage());
-            StructureMapping nwMapping = getNWMappings(seq, pdbFile,
-                    targetChainId, maxChain, pdb, maxAlignseq);
-            seqToStrucMapping.add(nwMapping);
-            maxChain.makeExactMapping(maxAlignseq, seq);
-            maxChain.transferRESNUMFeatures(seq, "IEA:Jalview",
-                    pdb.getId().toLowerCase(Locale.ROOT)); // FIXME: is
-            // this
-            // "IEA:Jalview" ?
-            maxChain.transferResidueAnnotation(nwMapping, sqmpping);
-            ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
           }
         }
         else
         {
-          List<StructureMapping> foundSiftsMappings = new ArrayList<>();
           for (PDBChain chain : pdb.getChains())
           {
             StructureMapping siftsMapping = null;
@@ -593,45 +638,62 @@ public class StructureSelectionManager
               chain.transferResidueAnnotation(siftsMapping, null);
             } catch (SiftsException e)
             {
-              System.err.println(e.getMessage());
+              jalview.bin.Console.errPrintln(e.getMessage());
             } catch (Exception e)
             {
-              System.err.println(
+              jalview.bin.Console.errPrintln(
                       "Unexpected exception during SIFTS mapping - falling back to NW for this sequence/structure pair");
-              System.err.println(e.getMessage());
+              jalview.bin.Console.errPrintln(e.getMessage());
             }
           }
+          // If sifts was successful, add mappings and return
           if (!foundSiftsMappings.isEmpty())
           {
-            seqToStrucMapping.addAll(foundSiftsMappings);
             ds.addPDBId(sqmpping.getTo().getAllPDBEntries().get(0));
           }
-          else
-          {
-            StructureMapping nwMapping = getNWMappings(seq, pdbFile,
-                    maxChainId, maxChain, pdb, maxAlignseq);
-            seqToStrucMapping.add(nwMapping);
-            maxChain.transferRESNUMFeatures(seq, null,
-                    pdb.getId().toLowerCase(Locale.ROOT)); // FIXME: is this
-            // "IEA:Jalview" ?
-            maxChain.transferResidueAnnotation(nwMapping, sqmpping);
-            ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
-          }
+        }
+        
+        // If sifts was successful, add mappings and return
+        if (!foundSiftsMappings.isEmpty())
+        {
+          seqToStrucMapping.addAll(foundSiftsMappings);
         }
       }
-      else
+      if (foundSiftsMappings.isEmpty())
       {
-        if (progress != null)
+        // Not doing SIFTS, or SIFTS failed for some reason.
+        
+        // first check if we should use an identity mapping
+        if (idLengthChain != null && maxAlignseq.getS2Coverage() < 0.75)
         {
-          progress.setProgressBar(
-                  MessageManager.getString(
-                          "status.obtaining_mapping_with_nw_alignment"),
-                  progressSessionId);
+          Console.info(
+                  "Assuming 3Dsi identity mapping between structure and sequence");
+          StructureMapping matchMapping = getIdMappings(seq, pdbFile,
+                  idLengthChain.id, idLengthChain, pdb);
+          seqToStrucMapping.add(matchMapping);
+          ds.addPDBId(idLengthChain.sequence.getAllPDBEntries().get(0));
+          Console.info("Mapping added.");
+        }
+        else
+        {
+          if (maxAlignseq.getS1Coverage()<0.15 && maxAlignseq.getS2Coverage()<0.15)
+          {
+            // skip this - the NW alignment is spurious
+            continue;
+          }
+          // Construct a needleman wunsch mapping instead.
+          if (progress != null)
+          {
+            progress.setProgressBar(
+                    MessageManager.getString(
+                            "status.obtaining_mapping_with_nw_alignment"),
+                    progressSessionId);
+          }
+          StructureMapping nwMapping = getNWMappings(seq, pdbFile,
+                  maxChainId, maxChain, pdb, maxAlignseq);
+          seqToStrucMapping.add(nwMapping);
+          ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
         }
-        StructureMapping nwMapping = getNWMappings(seq, pdbFile, maxChainId,
-                maxChain, pdb, maxAlignseq);
-        seqToStrucMapping.add(nwMapping);
-        ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
       }
       if (forStructureView)
       {
@@ -659,7 +721,7 @@ public class StructureSelectionManager
   private boolean isStructureFileProcessed(String pdbFile,
           SequenceI[] sequenceArray)
   {
-    boolean parseSecStr = true;
+    boolean processed = false;
     if (isPDBFileRegistered(pdbFile))
     {
       for (SequenceI sq : sequenceArray)
@@ -679,13 +741,13 @@ public class StructureSelectionManager
             // passed, not the structure data ID -
             if (PDBfile.isCalcIdForFile(ala, findIdForPDBFile(pdbFile)))
             {
-              parseSecStr = false;
+              processed = true;
             }
           }
         }
       }
     }
-    return parseSecStr;
+    return processed;
   }
 
   public void addStructureMapping(StructureMapping sm)
@@ -733,6 +795,17 @@ public class StructureSelectionManager
     }
     return curChainMapping;
   }
+  
+  /**
+   * construct a mapping based on a pairwise alignment of the sequence and chain
+   * @param seq
+   * @param pdbFile
+   * @param maxChainId
+   * @param maxChain
+   * @param pdb
+   * @param maxAlignseq
+   * @return
+   */
 
   private StructureMapping getNWMappings(SequenceI seq, String pdbFile,
           String maxChainId, PDBChain maxChain, StructureFile pdb,
@@ -814,6 +887,82 @@ public class StructureSelectionManager
     maxChain.transferResidueAnnotation(nwMapping, sqmpping);
     return nwMapping;
   }
+  
+  /**
+   * construct a 1:1 mapping using given residue and sequence numbering 
+   * @param seq
+   * @param pdbFile
+   * @param identityChainId
+   * @param identityChain
+   * @param pdb
+   * @return
+   */
+
+  private StructureMapping getIdMappings(SequenceI seq, String pdbFile,
+          String identityChainId, PDBChain identityChain, StructureFile pdb)
+  {
+    final StringBuilder mappingDetails = new StringBuilder(128);
+    mappingDetails.append(NEWLINE)
+            .append("Sequence \u27f7 Structure mapping details");
+    mappingDetails.append(NEWLINE);
+    mappingDetails.append("Method: Matching length 1:1");
+    mappingDetails.append(NEWLINE).append("PDB Sequence is :")
+            .append(NEWLINE).append("Sequence = ")
+            .append(identityChain.sequence.getSequenceAsString());
+    mappingDetails.append(NEWLINE).append("No of residues = ")
+            .append(identityChain.residues.size()).append(NEWLINE)
+            .append(NEWLINE);
+
+    mappingDetails.append(NEWLINE)
+            .append("Aligned Sequence is: " + seq.getDisplayId(true));
+    mappingDetails.append(NEWLINE)
+            .append("Sequence = " + seq.getSequenceAsString());
+
+    int from = Math.max(seq.getStart(),identityChain.sequence.getStart());
+    int to = Math.min(seq.getEnd(), identityChain.sequence.getEnd());
+    jalview.datamodel.Mapping sqmpping = new jalview.datamodel.Mapping(seq,
+            new MapList(new int[]
+            { from,to },
+                    new int[]
+                    { from,to },
+                    1, 1));
+    identityChain.mapChainWith(sqmpping, seq);
+
+    identityChain.transferRESNUMFeatures(seq, null,
+            pdb.getId().toLowerCase(Locale.ROOT));
+
+    // Construct mapping
+    // TODO REFACTOR TO PDBChain as a builder
+    HashMap<Integer, int[]> mapping = new HashMap<>();
+    int resNum = -10000;
+    int index = 0;
+    char insCode = ' ';
+
+    do
+    {
+      Atom tmp = identityChain.atoms.elementAt(index);
+      if ((resNum != tmp.resNumber || insCode != tmp.insCode)
+              && tmp.alignmentMapping != -1)
+      {
+        resNum = tmp.resNumber;
+        insCode = tmp.insCode;
+        if (tmp.alignmentMapping >= -1)
+        {
+          mapping.put(tmp.alignmentMapping + 1,
+                  new int[]
+                  { tmp.resNumber, tmp.atomIndex });
+        }
+      }
+
+      index++;
+    } while (index < identityChain.atoms.size());
+
+    StructureMapping idMapping = new StructureMapping(seq, pdbFile,
+            pdb.getId(), identityChainId, mapping,
+            mappingDetails.toString());
+    identityChain.transferResidueAnnotation(idMapping, sqmpping);
+    return idMapping;
+  }
 
   public void removeStructureViewerListener(Object svl, String[] pdbfiles)
   {
@@ -1001,7 +1150,7 @@ public class StructureSelectionManager
           int indexpos = sm.getSeqPos(atom.getPdbResNum());
           if (lastipos != indexpos || lastseq != sm.sequence)
           {
-            results.addResult(sm.sequence, indexpos, indexpos);
+            results.appendResult(sm.sequence, indexpos, indexpos);
             lastipos = indexpos;
             lastseq = sm.sequence;
             // construct highlighted sequence list
@@ -1129,6 +1278,62 @@ public class StructureSelectionManager
     sl.highlightAtoms(atoms);
   }
 
+  public void highlightStructureRegionsFor(StructureListener sl,
+          SequenceI[] seqs, int... columns)
+  {
+    List<SequenceI> to_highlight = new ArrayList<SequenceI>();
+    for (SequenceI seq : seqs)
+    {
+      if (sl.isListeningFor(seq))
+      {
+        to_highlight.add(seq);
+      }
+    }
+    if (to_highlight.size() == 0)
+    {
+      return;
+    }
+    List<AtomSpec> atoms = new ArrayList<>();
+    for (SequenceI seq : to_highlight)
+    {
+      int atomNo;
+      for (StructureMapping sm : mappings)
+      {
+        if (sm.sequence == seq || sm.sequence == seq.getDatasetSequence()
+                || (sm.sequence.getDatasetSequence() != null && sm.sequence
+                        .getDatasetSequence() == seq.getDatasetSequence()))
+        {
+
+          for (int i = 0; i < columns.length; i += 2)
+          {
+            ContiguousI positions = seq.findPositions(columns[i] + 1,
+                    columns[i + 1] + 1);
+            if (positions == null)
+            {
+              continue;
+            }
+            for (int index = positions.getBegin(); index <= positions
+                    .getEnd(); index++)
+            {
+
+              atomNo = sm.getAtomNum(index);
+
+              if (atomNo > 0)
+              {
+                atoms.add(new AtomSpec(sm.pdbfile, sm.pdbchain,
+                        sm.getPDBResNum(index), atomNo));
+              }
+            }
+          }
+        }
+      }
+      if (atoms.size() > 0)
+      {
+        sl.highlightAtoms(atoms);
+      }
+    }
+  }
+
   /**
    * true if a mouse over event from an external (ie Vamsas) source is being
    * handled
@@ -1174,7 +1379,7 @@ public class StructureSelectionManager
      * 
      * if (mappings[j].sequence == seq && mappings[j].getPdbId().equals(pdbid)
      * && mappings[j].pdbfile.equals(sl.getPdbFile())) {
-     * System.out.println(pdbid+" "+mappings[j].getPdbId() +"
+     * jalview.bin.Console.outPrintln(pdbid+" "+mappings[j].getPdbId() +"
      * "+mappings[j].pdbfile);
      * 
      * java.awt.Color col; for(int index=0; index<seq.getLength(); index++) {
@@ -1271,7 +1476,7 @@ public class StructureSelectionManager
       boolean removed = seqmappings.remove(acf);
       if (removed && seqmappings.isEmpty())
       { // debug
-        System.out.println("All mappings removed");
+        jalview.bin.Console.outPrintln("All mappings removed");
       }
     }
   }
@@ -1507,4 +1712,44 @@ public class StructureSelectionManager
     return seqmappings;
   }
 
+  /**
+   * quick and dirty route to just highlight all structure positions for a range
+   * of columns
+   * 
+   * @param sequencesArray
+   * @param is
+   *          start-end columns on sequencesArray
+   * @param source
+   *          origin parent AlignmentPanel
+   */
+  public void highlightPositionsOnMany(SequenceI[] sequencesArray, int[] is,
+          Object source)
+  {
+    for (int i = 0; i < listeners.size(); i++)
+    {
+      Object listener = listeners.elementAt(i);
+      if (listener == source)
+      {
+        // TODO listener (e.g. SeqPanel) is never == source (AlignViewport)
+        // Temporary fudge with SequenceListener.getVamsasSource()
+        continue;
+      }
+      if (listener instanceof StructureListener)
+      {
+        highlightStructureRegionsFor((StructureListener) listener,
+                sequencesArray, is);
+      }
+    }
+  }
+
+  public Map<String, String> getPdbFileNameIdMap()
+  {
+    return pdbFileNameId;
+  }
+
+  public Map<String, String> getPdbIdFileNameMap()
+  {
+    return pdbIdFileName;
+  }
+
 }