X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fjmol%2FJmolParser.java;h=180da8f449f8044e4f522fb5f758a5dc1b4fd277;hb=2681593fa6810815547b688b89424c837cac1240;hp=668457b4b4cbe7ac2bc2be0302d42aa13be5c827;hpb=e77a693cf000b4ff8a863411acf8c90c6390a061;p=jalview.git

diff --git a/src/jalview/ext/jmol/JmolParser.java b/src/jalview/ext/jmol/JmolParser.java
index 668457b..180da8f 100644
--- a/src/jalview/ext/jmol/JmolParser.java
+++ b/src/jalview/ext/jmol/JmolParser.java
@@ -22,36 +22,27 @@ package jalview.ext.jmol;
 
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.Annotation;
-import jalview.datamodel.DBRefEntry;
-import jalview.datamodel.DBRefSource;
 import jalview.datamodel.PDBEntry;
-import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 import jalview.io.FileParse;
 import jalview.io.StructureFile;
 import jalview.schemes.ResidueProperties;
-import jalview.util.Comparison;
+import jalview.structure.StructureImportSettings;
+import jalview.util.Format;
 import jalview.util.MessageManager;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Hashtable;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Vector;
 
-import javajs.awt.Dimension;
-
 import org.jmol.api.JmolStatusListener;
 import org.jmol.api.JmolViewer;
 import org.jmol.c.CBK;
 import org.jmol.c.STR;
-import org.jmol.modelset.Group;
-import org.jmol.modelset.Model;
 import org.jmol.modelset.ModelSet;
-import org.jmol.modelsetbio.BioModel;
-import org.jmol.modelsetbio.BioPolymer;
-import org.jmol.modelsetbio.Monomer;
 import org.jmol.viewer.Viewer;
 
 import MCview.Atom;
@@ -68,24 +59,9 @@ public class JmolParser extends StructureFile implements JmolStatusListener
 {
   Viewer viewer = null;
 
-  public JmolParser(boolean addAlignmentAnnotations,
-          boolean predictSecondaryStructure, boolean externalSecStr,
-          String inFile, String type) throws IOException
+  public JmolParser(String inFile, String type) throws IOException
   {
     super(inFile, type);
-    this.visibleChainAnnotation = addAlignmentAnnotations;
-    this.predictSecondaryStructure = predictSecondaryStructure;
-    this.externalSecondaryStructure = externalSecStr;
-  }
-
-  public JmolParser(boolean addAlignmentAnnotations,
-          boolean predictSecondaryStructure, boolean externalSecStr,
-          FileParse fp) throws IOException
-  {
-    super(fp);
-    this.visibleChainAnnotation = addAlignmentAnnotations;
-    this.predictSecondaryStructure = predictSecondaryStructure;
-    this.externalSecondaryStructure = externalSecStr;
   }
 
   public JmolParser(FileParse fp) throws IOException
@@ -93,11 +69,6 @@ public class JmolParser extends StructureFile implements JmolStatusListener
     super(fp);
   }
 
-  public JmolParser(String inFile, String type) throws IOException
-  {
-    super(inFile, type);
-  }
-
   public JmolParser()
   {
   }
@@ -113,7 +84,6 @@ public class JmolParser extends StructureFile implements JmolStatusListener
   @Override
   public void parse() throws IOException
   {
-
     setChains(new Vector<PDBChain>());
     Viewer jmolModel = getJmolData();
     jmolModel.openReader(getDataName(), getDataName(), getReader());
@@ -124,8 +94,19 @@ public class JmolParser extends StructureFile implements JmolStatusListener
      */
     if (jmolModel.ms.mc > 0)
     {
-      parseBiopolymer(jmolModel.ms);
-      // transformJmolModelToJalview(jmolModel.ms);
+      // ideally we do this
+      // try
+      // {
+      // setStructureFileType(jmolModel.evalString("show _fileType"));
+      // } catch (Exception q)
+      // {
+      // }
+      // ;
+      // instead, we distinguish .cif from non-.cif by filename
+      setStructureFileType(getDataName().toLowerCase().endsWith(".cif") ? PDBEntry.Type.MMCIF
+              .toString() : "PDB");
+
+      transformJmolModelToJalview(jmolModel.ms);
     }
   }
 
@@ -140,6 +121,10 @@ public class JmolParser extends StructureFile implements JmolStatusListener
     {
       try
       {
+        /*
+         * params -o (output to sysout) -n (nodisplay) -x (exit when finished)
+         * see http://wiki.jmol.org/index.php/Jmol_Application
+         */
         viewer = (Viewer) JmolViewer.allocateViewer(null, null, null, null,
                 null, "-x -o -n", this);
         // ensure the 'new' (DSSP) not 'old' (Ramachandran) SS method is used
@@ -163,7 +148,17 @@ public class JmolParser extends StructureFile implements JmolStatusListener
       List<SequenceI> prot = new ArrayList<SequenceI>();
       PDBChain tmpchain;
       String pdbId = (String) ms.getInfo(0, "title");
-      setId(pdbId);
+
+      if (pdbId == null)
+      {
+        setId(safeName(getDataName()));
+        setPDBIdAvailable(false);
+      }
+      else
+      {
+        setId(pdbId);
+        setPDBIdAvailable(true);
+      }
       List<Atom> significantAtoms = convertSignificantAtoms(ms);
       for (Atom tmpatom : significantAtoms)
       {
@@ -178,23 +173,20 @@ public class JmolParser extends StructureFile implements JmolStatusListener
           tmpchain.atoms.addElement(tmpatom);
         } catch (Exception e)
         {
-          tmpchain = new PDBChain(pdbId, tmpatom.chain);
+          tmpchain = new PDBChain(getId(), tmpatom.chain);
           getChains().add(tmpchain);
           tmpchain.atoms.addElement(tmpatom);
         }
         lastID = tmpatom.resNumIns.trim();
       }
+      xferSettings();
+
       makeResidueList();
       makeCaBondList();
 
-      if (getId() == null)
-      {
-        setId(inFile.getName());
-      }
       for (PDBChain chain : getChains())
       {
         SequenceI chainseq = postProcessChain(chain);
-        createAnnotation(chainseq, chain, ms.at);
         if (isRNA(chainseq))
         {
           rna.add(chainseq);
@@ -203,6 +195,11 @@ public class JmolParser extends StructureFile implements JmolStatusListener
         {
           prot.add(chainseq);
         }
+
+        if (StructureImportSettings.isProcessSecondaryStructure())
+        {
+          createAnnotation(chainseq, chain, ms.at);
+        }
       }
     } catch (OutOfMemoryError er)
     {
@@ -217,331 +214,124 @@ public class JmolParser extends StructureFile implements JmolStatusListener
   private List<Atom> convertSignificantAtoms(ModelSet ms)
   {
     List<Atom> significantAtoms = new ArrayList<Atom>();
+    HashMap<String, org.jmol.modelset.Atom> chainTerMap = new HashMap<String, org.jmol.modelset.Atom>();
+    org.jmol.modelset.Atom prevAtom = null;
     for (org.jmol.modelset.Atom atom : ms.at)
     {
       if (atom.getAtomName().equalsIgnoreCase("CA")
               || atom.getAtomName().equalsIgnoreCase("P"))
       {
+        if (!atomValidated(atom, prevAtom, chainTerMap))
+        {
+          continue;
+        }
         Atom curAtom = new Atom(atom.x, atom.y, atom.z);
         curAtom.atomIndex = atom.getIndex();
         curAtom.chain = atom.getChainIDStr();
-        curAtom.insCode = atom.group.getInsertionCode();
+        curAtom.insCode = atom.group.getInsertionCode() == '\000' ? ' '
+                : atom.group.getInsertionCode();
         curAtom.name = atom.getAtomName();
         curAtom.number = atom.getAtomNumber();
         curAtom.resName = atom.getGroup3(true);
         curAtom.resNumber = atom.getResno();
-        curAtom.ss = getSecondayStructure(atom.group
-                .getProteinStructureSubType());
         curAtom.occupancy = ms.occupancies != null ? ms.occupancies[atom
                 .getIndex()] : Float.valueOf(atom.getOccupancy100());
-        curAtom.resNumIns = "" + curAtom.resNumber + curAtom.insCode;
-        // curAtom.tfactor = atom.group.;
+        String fmt = new Format("%4i").form(curAtom.resNumber);
+        curAtom.resNumIns = (fmt + curAtom.insCode);
+        curAtom.tfactor = atom.getBfactor100() / 100f;
         curAtom.type = 0;
-        significantAtoms.add(curAtom);
+        // significantAtoms.add(curAtom);
+        // ignore atoms from subsequent models
+        if (!significantAtoms.contains(curAtom))
+        {
+          significantAtoms.add(curAtom);
+        }
+        prevAtom = atom;
       }
     }
     return significantAtoms;
   }
 
-  private void createAnnotation(SequenceI sequence, PDBChain chain,
-          org.jmol.modelset.Atom[] jmolAtoms)
+  private boolean atomValidated(org.jmol.modelset.Atom curAtom,
+          org.jmol.modelset.Atom prevAtom,
+          HashMap<String, org.jmol.modelset.Atom> chainTerMap)
   {
-    char[] secstr = new char[sequence.getLength()];
-    char[] secstrcode = new char[sequence.getLength()];
-    for (Residue residue : chain.residues)
+    // System.out.println("Atom: " + curAtom.getAtomNumber()
+    // + "   Last atom index " + curAtom.group.lastAtomIndex);
+    if (chainTerMap == null || prevAtom == null)
     {
-
+      return true;
     }
-    addSecondaryStructureAnnotation(chain.pdbid, sequence, secstr,
-            secstrcode, chain.id, sequence.getStart());
-  }
-
-  /**
-   * Process the Jmol BioPolymer array and generate a Jalview sequence for each
-   * chain found (including any secondary structure annotation from DSSP)
-   * 
-   * @param ms
-   * @throws IOException
-   */
-  public void parseBiopolymer(ModelSet ms) throws IOException
-  {
-    int modelIndex = -1;
-    for (Model model : ms.am)
+    String curAtomChId = curAtom.getChainIDStr();
+    String prevAtomChId = prevAtom.getChainIDStr();
+    // new chain encoutered
+    if (!prevAtomChId.equals(curAtomChId))
     {
-      modelIndex++;
-      String modelTitle = (String) ms.getInfo(modelIndex, "title");
-      /*
-       * Chains can span BioPolymers, so first make a flattened list, and then
-       * work out the lengths of chains present
-       */
-      List<Monomer> monomers = getMonomers(ms, (BioModel) model);
-      List<Integer> chainLengths = getChainLengths(monomers);
-
-      /*
-       * now chop up the Monomer list to make Jalview Sequences
-       */
-      int from = 0;
-      for (int length : chainLengths)
+      // On chain switch add previous chain termination to xTerMap if not exists
+      if (!chainTerMap.containsKey(prevAtomChId))
       {
-        buildSequenceFromChain(monomers.subList(from, from + length),
-                modelTitle);
-        from += length;
+        chainTerMap.put(prevAtomChId, prevAtom);
       }
-    }
-  }
-
-  /**
-   * Returns a flattened list of Monomer (residues) in order, across all
-   * BioPolymers in the model. This simplifies assembling chains which span
-   * BioPolymers. The result omits any alternate residues reported for the same
-   * sequence position (RESNUM value).
-   * 
-   * @param ms
-   * @param model
-   * @return
-   */
-  protected List<Monomer> getMonomers(ModelSet ms, BioModel model)
-  {
-    List<Monomer> result = new ArrayList<Monomer>();
-    int lastResNo = Integer.MIN_VALUE;
-
-    for (BioPolymer bp : model.bioPolymers)
-    {
-      for (int groupLeadAtoms : bp.getLeadAtomIndices())
+      // if current atom belongs to an already terminated chain and the resNum
+      // diff < 5 then mark as valid and update termination Atom
+      if (chainTerMap.containsKey(curAtomChId))
       {
-        Group group = ms.at[groupLeadAtoms].group;
-        if (group instanceof Monomer)
+        if (curAtom.getResno() < chainTerMap.get(curAtomChId).getResno())
         {
-          /*
-           * ignore alternate residue at same position example: 1ejg has
-           * residues A:LEU, B:ILE at RESNUM=25
-           */
-          int resNo = group.getResno();
-          if (lastResNo != resNo)
-          {
-            result.add((Monomer) group);
-          }
-          lastResNo = resNo;
+          return false;
+        }
+        if ((curAtom.getResno() - chainTerMap.get(curAtomChId).getResno()) < 5)
+        {
+          chainTerMap.put(curAtomChId, curAtom);
+          return true;
         }
+        return false;
       }
     }
-    return result;
-  }
-
-  /**
-   * Scans the list of Monomers (residue models), inspecting the chain id for
-   * each, and returns an array whose length is the number of chains, and values
-   * the length of each chain
-   * 
-   * @param monomers
-   * @return
-   */
-  protected List<Integer> getChainLengths(List<Monomer> monomers)
-  {
-    List<Integer> chainLengths = new ArrayList<Integer>();
-    int lastChainId = -1;
-    int length = 0;
-
-    for (Monomer monomer : monomers)
+    // atom with previously terminated chain encountered
+    else if (chainTerMap.containsKey(curAtomChId))
     {
-      int chainId = monomer.chain.chainID;
-      if (chainId != lastChainId && length > 0)
+      if (curAtom.getResno() < chainTerMap.get(curAtomChId).getResno())
       {
-        /*
-         * change of chain - record the length of the last one
-         */
-        chainLengths.add(length);
-        length = 0;
+        return false;
       }
-      lastChainId = chainId;
-      length++;
-    }
-    if (length > 0)
-    {
-      /*
-       * record the length of the final chain
-       */
-      chainLengths.add(length);
-    }
-
-    return chainLengths;
-  }
-
-  /**
-   * Helper method to construct a sequence for one chain and add it to the seqs
-   * list
-   * 
-   * @param monomers
-   *          a list of all monomers in the chain
-   * @param modelTitle
-   */
-  protected void buildSequenceFromChain(List<Monomer> monomers,
-          String modelTitle)
-  {
-    final int length = monomers.size();
-
-    /*
-     * arrays to hold sequence and secondary structure
-     */
-    char[] seq = new char[length];
-    char[] secstr = new char[length];
-    char[] secstrcode = new char[length];
-
-    /*
-     * populate the sequence and secondary structure arrays
-     */
-    extractJmolChainData(monomers, seq, secstr, secstrcode);
-
-    /*
-     * grab chain code and start position from first residue;
-     */
-    String chainId = monomers.get(0).chain.getIDStr();
-    int firstResNum = monomers.get(0).getResno();
-    if (firstResNum < 1)
-    {
-      // Jalview doesn't like residue < 1, so force this to 1
-      System.err.println("Converting chain " + chainId + " first RESNUM ("
-              + firstResNum + ") to 1");
-      firstResNum = 1;
-    }
-
-    /*
-     * convert any non-gap unknown residues to 'X'
-     */
-    convertNonGapCharacters(seq);
-
-    /*
-     * construct and add the Jalview sequence
-     */
-    String seqName = "" + modelTitle + "|" + chainId;
-    int start = firstResNum;
-    int end = firstResNum + length - 1;
-
-    SequenceI sq = new Sequence(seqName, seq, start, end);
-
-    addPdbid(sq, modelTitle, chainId);
-
-    addSourceDBref(sq, modelTitle, start, end);
-
-    seqs.add(sq);
-
-    /*
-     * add secondary structure predictions (if any)
-     */
-      addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode,
-              chainId, firstResNum);
-
-  }
-
-  /**
-   * Scans the list of (Jmol) Monomer objects, and adds the residue for each to
-   * the sequence array, and any converted secondary structure prediction to the
-   * secondary structure arrays
-   * 
-   * @param monomers
-   * @param seq
-   * @param secstr
-   * @param secstrcode
-   */
-  protected void extractJmolChainData(List<Monomer> monomers, char[] seq,
-          char[] secstr, char[] secstrcode)
-  {
-    int pos = 0;
-    for (Monomer monomer : monomers)
-    {
-      seq[pos] = monomer.getGroup1();
-
-      /*
-       * JAL-1828 replace a modified amino acid with its standard equivalent
-       * (e.g. MSE with MET->M) to maximise sequence matching
-       */
-      replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos);
-
-      /*
-       * if Jmol has derived a secondary structure prediction for this position,
-       * convert it to Jalview equivalent and save it
-       */
-      setSecondaryStructure(monomer.getProteinStructureSubType(), pos,
-              secstr, secstrcode);
-      pos++;
-    }
-  }
-
-  /**
-   * Replace any non-gap miscellaneous characters with 'X'
-   * 
-   * @param seq
-   * @return
-   */
-  protected void convertNonGapCharacters(char[] seq)
-  {
-    boolean isNa = Comparison.areNucleotide(new char[][] { seq });
-    int[] cinds = isNa ? ResidueProperties.nucleotideIndex
-            : ResidueProperties.aaIndex;
-    int nonGap = isNa ? ResidueProperties.maxNucleotideIndex
-            : ResidueProperties.maxProteinIndex;
-
-    for (int p = 0; p < seq.length; p++)
-    {
-      if (cinds[seq[p]] == nonGap)
+      if ((curAtom.getResno() - chainTerMap.get(curAtomChId).getResno()) < 5)
       {
-        seq[p] = 'X';
+        chainTerMap.put(curAtomChId, curAtom);
+        return true;
       }
+      return false;
     }
+    // HETATM with resNum jump > 2
+    return !(curAtom.isHetero() && ((curAtom.getResno() - prevAtom
+            .getResno()) > 2));
   }
 
-  /**
-   * Add a source db ref entry for the given sequence.
-   * 
-   * @param sq
-   * @param accessionId
-   * @param start
-   * @param end
-   */
-  protected void addSourceDBref(SequenceI sq, String accessionId,
-          int start, int end)
+  private void createAnnotation(SequenceI sequence, PDBChain chain,
+          org.jmol.modelset.Atom[] jmolAtoms)
   {
-    DBRefEntry sourceDBRef = new DBRefEntry();
-    sourceDBRef.setAccessionId(accessionId);
-    sourceDBRef.setSource(DBRefSource.MMCIF);
-    sourceDBRef.setStartRes(start);
-    sourceDBRef.setEndRes(end);
-    sq.setSourceDBRef(sourceDBRef);
-    sq.addDBRef(sourceDBRef);
-  }
+    char[] secstr = new char[sequence.getLength()];
+    char[] secstrcode = new char[sequence.getLength()];
 
-  /**
-   * Add a PDBEntry giving the source of PDB data to the sequence
-   * 
-   * @param sq
-   * @param id
-   * @param chainId
-   */
-  protected void addPdbid(SequenceI sq, String id, String chainId)
-  {
-    PDBEntry entry = new PDBEntry();
-    entry.setId(id);
-    entry.setType(PDBEntry.Type.MMCIF);
-    entry.setProperty(new Hashtable());
-    if (chainId != null)
-    {
-      // entry.getProperty().put("CHAIN", chains.elementAt(i).id);
-      entry.setChainCode(String.valueOf(chainId));
-    }
-    if (inFile != null)
+    // Ensure Residue size equals Seq size
+    if (chain.residues.size() != sequence.getLength())
     {
-      entry.setFile(inFile.getAbsolutePath());
+      return;
     }
-    else
+    int annotIndex = 0;
+    for (Residue residue : chain.residues)
     {
-      // TODO: decide if we should dump the datasource to disk
-      entry.setFile(getDataName());
+      Atom repAtom = residue.getAtoms().get(0);
+      STR proteinStructureSubType = jmolAtoms[repAtom.atomIndex].group
+              .getProteinStructureSubType();
+      setSecondaryStructure(proteinStructureSubType, annotIndex, secstr,
+              secstrcode);
+      ++annotIndex;
     }
-
-    sq.addPDBId(entry);
+    addSecondaryStructureAnnotation(chain.pdbid, sequence, secstr,
+            secstrcode, chain.id, sequence.getStart());
   }
 
-
   /**
    * Helper method that adds an AlignmentAnnotation for secondary structure to
    * the sequence, provided at least one secondary structure prediction has been
@@ -566,9 +356,15 @@ public class JmolParser extends StructureFile implements JmolStatusListener
     {
       if (secstr[p] >= 'A' && secstr[p] <= 'z')
       {
-        asecstr[p] = new Annotation(String.valueOf(secstr[p]), null,
-                secstrcode[p], Float.NaN);
-        ssFound = true;
+        try
+        {
+          asecstr[p] = new Annotation(String.valueOf(secstr[p]), null,
+                  secstrcode[p], Float.NaN);
+          ssFound = true;
+        } catch (Exception e)
+        {
+          // e.printStackTrace();
+        }
       }
     }
 
@@ -651,24 +447,6 @@ public class JmolParser extends StructureFile implements JmolStatusListener
     }
   }
 
-  private char getSecondayStructure(STR proteinStructureSubType)
-  {
-    switch (proteinStructureSubType)
-    {
-    case HELIX310:
-      return '3';
-    case HELIX:
-    case HELIXALPHA:
-      return 'H';
-    case HELIXPI:
-      return 'P';
-    case SHEET:
-      return 'E';
-    default:
-      return 0;
-    }
-  }
-
   /**
    * Convert any non-standard peptide codes to their standard code table
    * equivalent. (Initial version only does Selenomethionine MSE->MET.)
@@ -846,7 +624,7 @@ public class JmolParser extends StructureFile implements JmolStatusListener
    * Not implemented - returns null
    */
   @Override
-  public Dimension resizeInnerPanel(String data)
+  public int[] resizeInnerPanel(String data)
   {
     return null;
   }
@@ -867,4 +645,14 @@ public class JmolParser extends StructureFile implements JmolStatusListener
     this.predictSecondaryStructure = predictSecondaryStructure;
   }
 
+  public boolean isVisibleChainAnnotation()
+  {
+    return visibleChainAnnotation;
+  }
+
+  public void setVisibleChainAnnotation(boolean visibleChainAnnotation)
+  {
+    this.visibleChainAnnotation = visibleChainAnnotation;
+  }
+
 }