JAL-4369 invokeAndWait to avoid hang whilst calling getFiles if a Jmol redraw is...
[jalview.git] / src / jalview / structure / StructureSelectionManager.java
index 64c1547..0f82650 100644 (file)
@@ -26,10 +26,12 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
 import java.util.Vector;
 
 import jalview.analysis.AlignSeq;
@@ -54,6 +56,7 @@ import jalview.io.AppletFormatAdapter;
 import jalview.io.DataSourceType;
 import jalview.io.StructureFile;
 import jalview.structure.StructureImportSettings.TFType;
+import jalview.util.MapList;
 import jalview.util.MappingUtils;
 import jalview.util.MessageManager;
 import jalview.util.Platform;
@@ -155,16 +158,18 @@ public class StructureSelectionManager
   {
     if (mappings.isEmpty())
     {
-      System.err.println("reportMapping: No PDB/Sequence mappings.");
+      jalview.bin.Console
+              .errPrintln("reportMapping: No PDB/Sequence mappings.");
     }
     else
     {
-      System.err.println(
+      jalview.bin.Console.errPrintln(
               "reportMapping: There are " + mappings.size() + " mappings.");
       int i = 0;
       for (StructureMapping sm : mappings)
       {
-        System.err.println("mapping " + i++ + " : " + sm.pdbfile);
+        jalview.bin.Console
+                .errPrintln("mapping " + i++ + " : " + sm.pdbfile);
       }
     }
   }
@@ -357,6 +362,36 @@ public class StructureSelectionManager
             pdbFile, sourceType, tft, paeFilename, true);
   }
 
+  /**
+   * create sequence structure mappings between each sequence and the given
+   * pdbFile (retrieved via the given protocol). Either constructs a mapping
+   * using NW alignment or derives one from any available SIFTS mapping data.
+   * 
+   * @param forStructureView
+   *          when true, record the mapping for use in mouseOvers
+   * 
+   * @param sequenceArray
+   *          - one or more sequences to be mapped to pdbFile
+   * @param targetChainIds
+   *          - optional chain specification for mapping each sequence to pdb
+   *          (may be nill, individual elements may be nill) - JBPNote: JAL-2693
+   *          - this should be List<List<String>>, empty lists indicate no
+   *          predefined mappings
+   * @param pdbFile
+   *          - structure data resource
+   * @param sourceType
+   *          - how to resolve data from resource
+   * @param tft
+   *          - specify how to interpret the temperature factor column in the
+   *          atom data
+   * @param paeFilename
+   *          - when not null, specifies a filename containing a matrix
+   *          formatted in JSON using one of the known PAE formats
+   * @param doXferSettings
+   *          - when true, transfer annotation to mapped sequences in
+   *          sequenceArray
+   * @return null or the structure data parsed as a pdb file
+   */
   synchronized public StructureFile setMapping(boolean forStructureView,
           SequenceI[] sequenceArray, String[] targetChainIds,
           String pdbFile, DataSourceType sourceType, TFType tft,
@@ -388,9 +423,15 @@ public class StructureSelectionManager
    * @param IProgressIndicator
    *          reference to UI component that maintains a progress bar for the
    *          mapping operation
-   * @param tft - specify how to interpret the temperature factor column in the atom data
-   * @param paeFilename - when not null, specifies a filename containing a matrix formatted in JSON using one of the known PAE formats
-   * @param doXferSettings - when true, transfer annotation to mapped sequences in sequenceArray 
+   * @param tft
+   *          - specify how to interpret the temperature factor column in the
+   *          atom data
+   * @param paeFilename
+   *          - when not null, specifies a filename containing a matrix
+   *          formatted in JSON using one of the known PAE formats
+   * @param doXferSettings
+   *          - when true, transfer annotation to mapped sequences in
+   *          sequenceArray
    * @return null or the structure data parsed as a pdb file
    */
   synchronized public StructureFile computeMapping(boolean forStructureView,
@@ -483,32 +524,15 @@ public class StructureSelectionManager
       {
         ds = ds.getDatasetSequence();
       }
-
+      List <PDBEntry> putativePDBe = PDBEntryUtils.selectPutativePDBe(seq,ds, pdb); 
+      
       if (targetChainIds != null && targetChainIds[s] != null)
       {
         infChain = false;
         targetChainId = targetChainIds[s];
       }
-      else if (seq.getName().indexOf("|") > -1)
-      {
-        targetChainId = seq.getName()
-                .substring(seq.getName().lastIndexOf("|") + 1);
-        if (targetChainId.length() > 1)
-        {
-          if (targetChainId.trim().length() == 0)
-          {
-            targetChainId = " ";
-          }
-          else
-          {
-            // not a valid chain identifier
-            targetChainId = "";
-          }
-        }
-      }
-      else
-      {
-        targetChainId = "";
+      else {
+        targetChainId = PDBEntryUtils.inferChainId(seq);
       }
 
       /*
@@ -520,6 +544,7 @@ public class StructureSelectionManager
       String maxChainId = " ";
       PDBChain maxChain = null;
       boolean first = true;
+      PDBChain idLengthChain = null;
       for (PDBChain chain : pdb.getChains())
       {
         if (targetChainId.length() > 0 && !targetChainId.equals(chain.id)
@@ -527,15 +552,22 @@ public class StructureSelectionManager
         {
           continue; // don't try to map chains don't match.
         }
+        PDBEntry putativeChain = null;
+        if (!putativePDBe.isEmpty() && (putativeChain = PDBEntryUtils
+                .selectPutativePDBEntry(putativePDBe, chain)) == null)
+        {
+          continue;
+        }
         // TODO: correctly determine sequence type for mixed na/peptide
         // structures
         final String type = chain.isNa ? AlignSeq.DNA : AlignSeq.PEP;
         AlignSeq as = AlignSeq.doGlobalNWAlignment(seq, chain.sequence,
                 type);
-        // equivalent to:
-        // AlignSeq as = new AlignSeq(sequence[s], chain.sequence, type);
-        // as.calcScoreMatrix();
-        // as.traceAlignment();
+        // TODO: JAL-4366 determinine of a crummy alignment but exact match should make this chain the one to be mapped to a 3di sequence
+        if (as.s1str.length() == as.s2str.length())
+        {
+          idLengthChain = chain;
+        }
 
         if (first || as.maxscore > max
                 || (as.maxscore == max && chain.id.equals(targetChainId)))
@@ -551,13 +583,13 @@ public class StructureSelectionManager
       {
         continue;
       }
-
       if (sourceType == DataSourceType.PASTE)
       {
         pdbFile = "INLINE" + pdb.getId();
       }
-
       List<StructureMapping> seqToStrucMapping = new ArrayList<>();
+
+      List<StructureMapping> foundSiftsMappings = new ArrayList<>();
       if (isMapUsingSIFTs && seq.isProtein())
       {
         if (progress != null)
@@ -582,26 +614,15 @@ public class StructureSelectionManager
                     pdb.getId().toLowerCase(Locale.ROOT));
             maxChain.transferResidueAnnotation(siftsMapping, null);
             ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
+            foundSiftsMappings.add(siftsMapping);
 
           } catch (SiftsException e)
           {
-            // fall back to NW alignment
             Console.error(e.getMessage());
-            StructureMapping nwMapping = getNWMappings(seq, pdbFile,
-                    targetChainId, maxChain, pdb, maxAlignseq);
-            seqToStrucMapping.add(nwMapping);
-            maxChain.makeExactMapping(maxAlignseq, seq);
-            maxChain.transferRESNUMFeatures(seq, "IEA:Jalview",
-                    pdb.getId().toLowerCase(Locale.ROOT)); // FIXME: is
-            // this
-            // "IEA:Jalview" ?
-            maxChain.transferResidueAnnotation(nwMapping, sqmpping);
-            ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
           }
         }
         else
         {
-          List<StructureMapping> foundSiftsMappings = new ArrayList<>();
           for (PDBChain chain : pdb.getChains())
           {
             StructureMapping siftsMapping = null;
@@ -617,45 +638,62 @@ public class StructureSelectionManager
               chain.transferResidueAnnotation(siftsMapping, null);
             } catch (SiftsException e)
             {
-              System.err.println(e.getMessage());
+              jalview.bin.Console.errPrintln(e.getMessage());
             } catch (Exception e)
             {
-              System.err.println(
+              jalview.bin.Console.errPrintln(
                       "Unexpected exception during SIFTS mapping - falling back to NW for this sequence/structure pair");
-              System.err.println(e.getMessage());
+              jalview.bin.Console.errPrintln(e.getMessage());
             }
           }
+          // If sifts was successful, add mappings and return
           if (!foundSiftsMappings.isEmpty())
           {
-            seqToStrucMapping.addAll(foundSiftsMappings);
             ds.addPDBId(sqmpping.getTo().getAllPDBEntries().get(0));
           }
-          else
-          {
-            StructureMapping nwMapping = getNWMappings(seq, pdbFile,
-                    maxChainId, maxChain, pdb, maxAlignseq);
-            seqToStrucMapping.add(nwMapping);
-            maxChain.transferRESNUMFeatures(seq, null,
-                    pdb.getId().toLowerCase(Locale.ROOT)); // FIXME: is this
-            // "IEA:Jalview" ?
-            maxChain.transferResidueAnnotation(nwMapping, sqmpping);
-            ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
-          }
+        }
+        
+        // If sifts was successful, add mappings and return
+        if (!foundSiftsMappings.isEmpty())
+        {
+          seqToStrucMapping.addAll(foundSiftsMappings);
         }
       }
-      else
+      if (foundSiftsMappings.isEmpty())
       {
-        if (progress != null)
+        // Not doing SIFTS, or SIFTS failed for some reason.
+        
+        // first check if we should use an identity mapping
+        if (idLengthChain != null && maxAlignseq.getS2Coverage() < 0.75)
         {
-          progress.setProgressBar(
-                  MessageManager.getString(
-                          "status.obtaining_mapping_with_nw_alignment"),
-                  progressSessionId);
+          Console.info(
+                  "Assuming 3Dsi identity mapping between structure and sequence");
+          StructureMapping matchMapping = getIdMappings(seq, pdbFile,
+                  idLengthChain.id, idLengthChain, pdb);
+          seqToStrucMapping.add(matchMapping);
+          ds.addPDBId(idLengthChain.sequence.getAllPDBEntries().get(0));
+          Console.info("Mapping added.");
+        }
+        else
+        {
+          if (maxAlignseq.getS1Coverage()<0.15 && maxAlignseq.getS2Coverage()<0.15)
+          {
+            // skip this - the NW alignment is spurious
+            continue;
+          }
+          // Construct a needleman wunsch mapping instead.
+          if (progress != null)
+          {
+            progress.setProgressBar(
+                    MessageManager.getString(
+                            "status.obtaining_mapping_with_nw_alignment"),
+                    progressSessionId);
+          }
+          StructureMapping nwMapping = getNWMappings(seq, pdbFile,
+                  maxChainId, maxChain, pdb, maxAlignseq);
+          seqToStrucMapping.add(nwMapping);
+          ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
         }
-        StructureMapping nwMapping = getNWMappings(seq, pdbFile, maxChainId,
-                maxChain, pdb, maxAlignseq);
-        seqToStrucMapping.add(nwMapping);
-        ds.addPDBId(maxChain.sequence.getAllPDBEntries().get(0));
       }
       if (forStructureView)
       {
@@ -757,6 +795,17 @@ public class StructureSelectionManager
     }
     return curChainMapping;
   }
+  
+  /**
+   * construct a mapping based on a pairwise alignment of the sequence and chain
+   * @param seq
+   * @param pdbFile
+   * @param maxChainId
+   * @param maxChain
+   * @param pdb
+   * @param maxAlignseq
+   * @return
+   */
 
   private StructureMapping getNWMappings(SequenceI seq, String pdbFile,
           String maxChainId, PDBChain maxChain, StructureFile pdb,
@@ -838,6 +887,82 @@ public class StructureSelectionManager
     maxChain.transferResidueAnnotation(nwMapping, sqmpping);
     return nwMapping;
   }
+  
+  /**
+   * construct a 1:1 mapping using given residue and sequence numbering 
+   * @param seq
+   * @param pdbFile
+   * @param identityChainId
+   * @param identityChain
+   * @param pdb
+   * @return
+   */
+
+  private StructureMapping getIdMappings(SequenceI seq, String pdbFile,
+          String identityChainId, PDBChain identityChain, StructureFile pdb)
+  {
+    final StringBuilder mappingDetails = new StringBuilder(128);
+    mappingDetails.append(NEWLINE)
+            .append("Sequence \u27f7 Structure mapping details");
+    mappingDetails.append(NEWLINE);
+    mappingDetails.append("Method: Matching length 1:1");
+    mappingDetails.append(NEWLINE).append("PDB Sequence is :")
+            .append(NEWLINE).append("Sequence = ")
+            .append(identityChain.sequence.getSequenceAsString());
+    mappingDetails.append(NEWLINE).append("No of residues = ")
+            .append(identityChain.residues.size()).append(NEWLINE)
+            .append(NEWLINE);
+
+    mappingDetails.append(NEWLINE)
+            .append("Aligned Sequence is: " + seq.getDisplayId(true));
+    mappingDetails.append(NEWLINE)
+            .append("Sequence = " + seq.getSequenceAsString());
+
+    int from = Math.max(seq.getStart(),identityChain.sequence.getStart());
+    int to = Math.min(seq.getEnd(), identityChain.sequence.getEnd());
+    jalview.datamodel.Mapping sqmpping = new jalview.datamodel.Mapping(seq,
+            new MapList(new int[]
+            { from,to },
+                    new int[]
+                    { from,to },
+                    1, 1));
+    identityChain.mapChainWith(sqmpping, seq);
+
+    identityChain.transferRESNUMFeatures(seq, null,
+            pdb.getId().toLowerCase(Locale.ROOT));
+
+    // Construct mapping
+    // TODO REFACTOR TO PDBChain as a builder
+    HashMap<Integer, int[]> mapping = new HashMap<>();
+    int resNum = -10000;
+    int index = 0;
+    char insCode = ' ';
+
+    do
+    {
+      Atom tmp = identityChain.atoms.elementAt(index);
+      if ((resNum != tmp.resNumber || insCode != tmp.insCode)
+              && tmp.alignmentMapping != -1)
+      {
+        resNum = tmp.resNumber;
+        insCode = tmp.insCode;
+        if (tmp.alignmentMapping >= -1)
+        {
+          mapping.put(tmp.alignmentMapping + 1,
+                  new int[]
+                  { tmp.resNumber, tmp.atomIndex });
+        }
+      }
+
+      index++;
+    } while (index < identityChain.atoms.size());
+
+    StructureMapping idMapping = new StructureMapping(seq, pdbFile,
+            pdb.getId(), identityChainId, mapping,
+            mappingDetails.toString());
+    identityChain.transferResidueAnnotation(idMapping, sqmpping);
+    return idMapping;
+  }
 
   public void removeStructureViewerListener(Object svl, String[] pdbfiles)
   {
@@ -1025,7 +1150,7 @@ public class StructureSelectionManager
           int indexpos = sm.getSeqPos(atom.getPdbResNum());
           if (lastipos != indexpos || lastseq != sm.sequence)
           {
-            results.addResult(sm.sequence, indexpos, indexpos);
+            results.appendResult(sm.sequence, indexpos, indexpos);
             lastipos = indexpos;
             lastseq = sm.sequence;
             // construct highlighted sequence list
@@ -1254,7 +1379,7 @@ public class StructureSelectionManager
      * 
      * if (mappings[j].sequence == seq && mappings[j].getPdbId().equals(pdbid)
      * && mappings[j].pdbfile.equals(sl.getPdbFile())) {
-     * System.out.println(pdbid+" "+mappings[j].getPdbId() +"
+     * jalview.bin.Console.outPrintln(pdbid+" "+mappings[j].getPdbId() +"
      * "+mappings[j].pdbfile);
      * 
      * java.awt.Color col; for(int index=0; index<seq.getLength(); index++) {
@@ -1351,7 +1476,7 @@ public class StructureSelectionManager
       boolean removed = seqmappings.remove(acf);
       if (removed && seqmappings.isEmpty())
       { // debug
-        System.out.println("All mappings removed");
+        jalview.bin.Console.outPrintln("All mappings removed");
       }
     }
   }