From: gmungoc <g.m.carstairs@dundee.ac.uk>
Date: Wed, 2 Dec 2015 16:52:41 +0000 (+0000)
Subject: JAL-1827 refactored to handle missing residues better
X-Git-Tag: Release_2_10_0~331^2~2
X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d8d34047253ad28418f5478d9185a9b840114072;p=jalview.git

JAL-1827 refactored to handle missing residues better
---

diff --git a/src/jalview/ext/jmol/PDBFileWithJmol.java b/src/jalview/ext/jmol/PDBFileWithJmol.java
index 8d76afc..49cb6d5 100644
--- a/src/jalview/ext/jmol/PDBFileWithJmol.java
+++ b/src/jalview/ext/jmol/PDBFileWithJmol.java
@@ -28,10 +28,13 @@ import jalview.datamodel.SequenceI;
 import jalview.io.AlignFile;
 import jalview.io.FileParse;
 import jalview.schemes.ResidueProperties;
+import jalview.util.Comparison;
 import jalview.util.MessageManager;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Hashtable;
+import java.util.List;
 import java.util.Map;
 
 import javajs.awt.Dimension;
@@ -39,11 +42,13 @@ import javajs.awt.Dimension;
 import org.jmol.api.JmolStatusListener;
 import org.jmol.api.JmolViewer;
 import org.jmol.c.CBK;
+import org.jmol.c.STR;
 import org.jmol.modelset.Group;
 import org.jmol.modelset.Model;
 import org.jmol.modelset.ModelSet;
 import org.jmol.modelsetbio.BioModel;
 import org.jmol.modelsetbio.BioPolymer;
+import org.jmol.modelsetbio.Monomer;
 import org.jmol.viewer.Viewer;
 
 /**
@@ -84,6 +89,8 @@ public class PDBFileWithJmol extends AlignFile implements
       {
         viewer = (Viewer) JmolViewer.allocateViewer(null, null, null, null,
                 null, "-x -o -n", this);
+        // ensure the 'new' (DSSP) not 'old' (Ramachandran) SS method is used
+        viewer.setBooleanProperty("defaultStructureDSSP", true);
       } catch (ClassCastException x)
       {
         throw new Error(MessageManager.formatMessage(
@@ -108,229 +115,95 @@ public class PDBFileWithJmol extends AlignFile implements
     }
   }
 
-  /*
-   * (non-Javadoc)
+  /**
+   * Convert Jmol's secondary structure code to Jalview's, and stored it in the
+   * secondary structure arrays at the given sequence position
    * 
-   * @see jalview.io.AlignFile#parse()
+   * @param proteinStructureSubType
+   * @param pos
+   * @param secstr
+   * @param secstrcode
    */
-  @Override
-  public void parse() throws IOException
+  protected void setSecondaryStructure(STR proteinStructureSubType,
+          int pos, char[] secstr, char[] secstrcode)
   {
-    Viewer jmd = getJmolData();
-    jmd.openReader(getDataName(), getDataName(), getReader());
-    waitForScript(jmd);
-
-    if (jmd.ms.mc > 0)
+    switch (proteinStructureSubType)
     {
-      ModelSet ms = jmd.ms;
-      // Jmol 14.2 added third argument doReport = false
-      ms.calculateStructures(null, true, false, false, true);
-      // System.out.println("Structs\n"+structs);
-      Group group = null;
-      int modelIndex = -1;
-      for (Model model : ms.am)
+    case HELIX310:
+      if (secstr[pos] == 0)
       {
-        modelIndex++;
-        for (BioPolymer bp : ((BioModel) model).bioPolymers)
-        {
-          int lastChainId = 0; // int value of character e.g. 65 for A
-          String lastChainIdAlpha = "";
-
-          int[] groups = bp.getLeadAtomIndices();
-          char seq[] = new char[groups.length], secstr[] = new char[groups.length], secstrcode[] = new char[groups.length];
-          int groupc = 0, len = 0, firstrnum = 1, lastrnum = 0;
-
-          do
-          {
-            if (groupc >= groups.length
-                    || ms.at[groups[groupc]].group.chain.chainID != lastChainId)
-            {
-              /*
-               * on change of chain (or at end), construct the sequence and
-               * secondary structure annotation for the last chain
-               */
-              if (len > 0)
-              {
-                boolean isNa = bp.isNucleic();
-                // normalise sequence from Jmol to jalview
-                int[] cinds = isNa ? ResidueProperties.nucleotideIndex
-                        : ResidueProperties.aaIndex;
-                int nonGap = isNa ? ResidueProperties.maxNucleotideIndex
-                        : ResidueProperties.maxProteinIndex;
-                char ngc = 'X';
-                char newseq[] = new char[len];
-                Annotation asecstr[] = new Annotation[len + firstrnum - 1];
-                for (int p = 0; p < len; p++)
-                {
-                  newseq[p] = cinds[seq[p]] == nonGap ? ngc : seq[p];
-                  if (secstr[p] >= 'A' && secstr[p] <= 'z')
-                  {
-                    try
-                    {
-                      asecstr[p] = new Annotation("" + secstr[p], null,
-                              secstrcode[p], Float.NaN);
-                    } catch (ArrayIndexOutOfBoundsException e)
-                    {
-                      // skip - patch for JAL-1836
-                    }
-                  }
-                }
-                String modelTitle = (String) ms
-                        .getInfo(modelIndex, "title");
-                SequenceI sq = new Sequence("" + getDataName() + "|"
-                        + modelTitle + "|" + lastChainIdAlpha, newseq,
-                        firstrnum, lastrnum);
-                PDBEntry pdbe = new PDBEntry();
-                pdbe.setFile(getDataName());
-                pdbe.setId(getDataName());
-                pdbe.setProperty(new Hashtable());
-                // pdbe.getProperty().put("CHAIN", "" + _lastChainId);
-                pdbe.setChainCode(lastChainIdAlpha);
-                sq.addPDBId(pdbe);
-                // JAL-1533
-                // Need to put the number of models for this polymer somewhere
-                // for Chimera/others to grab
-                // pdbe.getProperty().put("PDBMODELS", biopoly.)
-                seqs.add(sq);
-                if (!isNa)
-                {
-                  String mt = modelTitle == null ? getDataName()
-                          : modelTitle;
-                  if (lastChainId >= ' ')
-                  {
-                    mt += lastChainIdAlpha;
-                  }
-                  AlignmentAnnotation ann = new AlignmentAnnotation(
-                          "Secondary Structure", "Secondary Structure for "
-                                  + mt, asecstr);
-                  ann.belowAlignment = true;
-                  ann.visible = true;
-                  ann.autoCalculated = false;
-                  ann.setCalcId(getClass().getName());
-                  sq.addAlignmentAnnotation(ann);
-                  ann.adjustForAlignment();
-                  ann.validateRangeAndDisplay();
-                  annotations.add(ann);
-                }
-              }
-              len = 0;
-              firstrnum = 1;
-              lastrnum = 0;
-            }
-            if (groupc < groups.length)
-            {
-              group = ms.at[groups[groupc]].group;
-              if (len == 0)
-              {
-                firstrnum = group.getResno();
-                lastChainId = group.chain.chainID;
-                lastChainIdAlpha = group.chain.getIDStr();
-              }
-              else
-              {
-                lastrnum = group.getResno();
-              }
-              seq[len] = group.getGroup1();
-
-              /*
-               * JAL-1828 replace a modified amino acid with its standard
-               * equivalent (e.g. MSE with MET->M) to maximise sequence matching
-               */
-              String threeLetterCode = group.getGroup3();
-              String canonical = ResidueProperties
-                      .getCanonicalAminoAcid(threeLetterCode);
-              if (canonical != null
-                      && !canonical.equalsIgnoreCase(threeLetterCode))
-              {
-                seq[len] = ResidueProperties
-                        .getSingleCharacterCode(canonical);
-              }
-              switch (group.getProteinStructureSubType())
-              {
-              case HELIX310:
-                if (secstr[len] == 0)
-                {
-                  secstr[len] = '3';
-                }
-              case HELIXALPHA:
-                if (secstr[len] == 0)
-                {
-                  secstr[len] = 'H';
-                }
-              case HELIXPI:
-                if (secstr[len] == 0)
-                {
-                  secstr[len] = 'P';
-                }
-              case HELIX:
-                if (secstr[len] == 0)
-                {
-                  secstr[len] = 'H';
-                }
-                secstrcode[len] = 'H';
-                break;
-              case SHEET:
-                secstr[len] = 'E';
-                secstrcode[len] = 'E';
-                break;
-              default:
-                secstr[len] = 0;
-                secstrcode[len] = 0;
-              }
-              len++;
-            }
-          } while (groupc++ < groups.length);
-        }
+        secstr[pos] = '3';
       }
-
-      /*
-       * lastScriptTermination = -9465; String dsspOut =
-       * jmd.evalString("calculate STRUCTURE"); if (dsspOut.equals("pending")) {
-       * while (lastScriptTermination == -9465) { try { Thread.sleep(50); }
-       * catch (Exception x) { } ; } } System.out.println(lastConsoleEcho);
-       */
+    case HELIXALPHA:
+      if (secstr[pos] == 0)
+      {
+        secstr[pos] = 'H';
+      }
+    case HELIXPI:
+      if (secstr[pos] == 0)
+      {
+        secstr[pos] = 'P';
+      }
+    case HELIX:
+      if (secstr[pos] == 0)
+      {
+        secstr[pos] = 'H';
+      }
+      secstrcode[pos] = 'H';
+      break;
+    case SHEET:
+      secstr[pos] = 'E';
+      secstrcode[pos] = 'E';
+      break;
+    default:
+      secstr[pos] = 0;
+      secstrcode[pos] = 0;
     }
   }
 
-  /*
-   * (non-Javadoc)
+  /**
+   * Convert any non-standard peptide codes to their standard code table
+   * equivalent. (Initial version only does Selenomethionine MSE->MET.)
    * 
-   * @see jalview.io.AlignFile#print()
+   * @param threeLetterCode
+   * @param seq
+   * @param pos
+   */
+  protected void replaceNonCanonicalResidue(String threeLetterCode,
+          char[] seq, int pos)
+  {
+    String canonical = ResidueProperties
+            .getCanonicalAminoAcid(threeLetterCode);
+    if (canonical != null && !canonical.equalsIgnoreCase(threeLetterCode))
+    {
+      seq[pos] = ResidueProperties.getSingleCharacterCode(canonical);
+    }
+  }
+
+  /**
+   * Not implemented - returns null
    */
   @Override
   public String print()
   {
-    // TODO Auto-generated method stub
     return null;
   }
 
+  /**
+   * Not implemented
+   */
   @Override
   public void setCallbackFunction(String callbackType,
           String callbackFunction)
   {
-    // TODO Auto-generated method stub
-
   }
 
-  /*
-   * @Override public void notifyCallback(EnumCallback type, Object[] data) {
-   * try { switch (type) { case ERROR: case SCRIPT:
-   * notifyScriptTermination((String) data[2], ((Integer) data[3]).intValue());
-   * break; case MESSAGE: sendConsoleMessage((data == null) ? ((String) null) :
-   * (String) data[1]); break; case LOADSTRUCT: notifyFileLoaded((String)
-   * data[1], (String) data[2], (String) data[3], (String) data[4], ((Integer)
-   * data[5]).intValue());
-   * 
-   * break; default: // System.err.println("Unhandled callback " + type + " " //
-   * + data[1].toString()); break; } } catch (Exception e) {
-   * System.err.println("Squashed Jmol callback handler error:");
-   * e.printStackTrace(); } }
-   */
-  public void notifyCallback(CBK type, Object[] data)
+  @Override
+  public void notifyCallback(CBK cbType, Object[] data)
   {
     String strInfo = (data == null || data[1] == null ? null : data[1]
             .toString());
-    switch (type)
+    switch (cbType)
     {
     case ECHO:
       sendConsoleEcho(strInfo);
@@ -407,49 +280,63 @@ public class PDBFileWithJmol extends AlignFile implements
     }
   }
 
+  /**
+   * Not implemented - returns null
+   */
   @Override
   public String eval(String strEval)
   {
-    // TODO Auto-generated method stub
     return null;
   }
 
+  /**
+   * Not implemented - returns null
+   */
   @Override
   public float[][] functionXY(String functionName, int x, int y)
   {
-    // TODO Auto-generated method stub
     return null;
   }
 
+  /**
+   * Not implemented - returns null
+   */
   @Override
   public float[][][] functionXYZ(String functionName, int nx, int ny, int nz)
   {
-    // TODO Auto-generated method stub
     return null;
   }
 
+  /**
+   * Not implemented - returns null
+   */
   @Override
-  public String createImage(String fileName, String type,
+  public String createImage(String fileName, String imageType,
           Object text_or_bytes, int quality)
   {
-    // TODO Auto-generated method stub
     return null;
   }
 
+  /**
+   * Not implemented - returns null
+   */
   @Override
   public Map<String, Object> getRegistryInfo()
   {
-    // TODO Auto-generated method stub
     return null;
   }
 
+  /**
+   * Not implemented
+   */
   @Override
   public void showUrl(String url)
   {
-    // TODO Auto-generated method stub
-
   }
 
+  /**
+   * Not implemented - returns null
+   */
   @Override
   public Dimension resizeInnerPanel(String data)
   {
@@ -462,4 +349,323 @@ public class PDBFileWithJmol extends AlignFile implements
     return null;
   }
 
+  /**
+   * Calls the Jmol library to parse the PDB file, and then inspects the
+   * resulting object model to generate Jalview-style sequences, with secondary
+   * structure annotation added where available (i.e. where it has been computed
+   * by Jmol using DSSP).
+   * 
+   * @see jalview.io.AlignFile#parse()
+   */
+  @Override
+  public void parse() throws IOException
+  {
+    Viewer jmolModel = getJmolData();
+    jmolModel.openReader(getDataName(), getDataName(), getReader());
+    waitForScript(jmolModel);
+
+    /*
+     * Convert one or more Jmol Model objects to Jalview objects.
+     */
+    if (jmolModel.ms.mc > 0)
+    {
+      parseBiopolymers(jmolModel.ms);
+    }
+  }
+
+  /**
+   * Process the Jmol BioPolymer array and generate a Jalview sequence for each
+   * chain found (including any secondary structure annotation from DSSP)
+   * 
+   * @param ms
+   * @throws IOException
+   */
+  public void parseBiopolymers(ModelSet ms) throws IOException
+  {
+    int modelIndex = -1;
+    for (Model model : ms.am)
+    {
+      modelIndex++;
+      String modelTitle = (String) ms.getInfo(modelIndex, "title");
+
+      /*
+       * as chains can span BioPolymers, we first make a flattened list, 
+       * and then work out the lengths of chains present
+       */
+      List<Monomer> monomers = getMonomers(ms, (BioModel) model);
+      List<Integer> chainLengths = getChainLengths(monomers);
+
+      /*
+       * now chop up the Monomer list to make Jalview Sequences
+       */
+      int from = 0;
+      for (int length : chainLengths)
+      {
+        buildSequenceFromChain(monomers.subList(from, from + length), modelTitle);
+        from += length;
+      }
+    }
+  }
+
+  /**
+   * Helper method to construct a sequence for one chain and add it to the seqs
+   * list
+   * 
+   * @param monomers
+   *          a list of all monomers in the chain
+   * @param modelTitle
+   */
+  protected void buildSequenceFromChain(List<Monomer> monomers, String modelTitle)
+  {
+    final int length = monomers.size();
+
+    /*
+     * arrays to hold sequence and secondary structure
+     */
+    char[] seq = new char[length];
+    char[] secstr = new char[length];
+    char[] secstrcode = new char[length];
+
+    /*
+     * populate the sequence and secondary structure arrays
+     */
+    extractJmolChainData(monomers, seq, secstr, secstrcode);
+
+    /*
+     * grab chain code and start position from first residue;
+     */
+    String chainId = monomers.get(0).chain.getIDStr();
+    int firstResNum = monomers.get(0).getResno();
+    if (firstResNum < 1)
+    {
+      // Jalview doesn't like residue < 1, so force this to 1
+      System.err.println("Converting chain " + chainId + " first RESNUM ("
+              + firstResNum + ") to 1");
+      firstResNum = 1;
+    }
+
+    /*
+     * convert any non-gap unknown residues to 'X'
+     */
+    convertNonGapCharacters(seq);
+
+    /*
+     * construct and add the Jalview sequence
+     */
+    SequenceI sq = new Sequence("" + getDataName() + "|" + modelTitle + "|"
+            + chainId, seq, firstResNum, firstResNum + length - 1);
+    seqs.add(sq);
+
+    /*
+     * add secondary structure predictions (if any)
+     */
+    addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode,
+            chainId, firstResNum);
+
+    /*
+     * record the PDB id for the sequence
+     */
+    addPdbid(sq, chainId);
+  }
+
+  /**
+   * Scans the list of (Jmol) Monomer objects, and adds the residue for each to
+   * the sequence array, and any converted secondary structure prediction to the
+   * secondary structure arrays
+   * 
+   * @param monomers
+   * @param seq
+   * @param secstr
+   * @param secstrcode
+   */
+  protected void extractJmolChainData(List<Monomer> monomers, char[] seq,
+          char[] secstr, char[] secstrcode)
+  {
+    int pos = 0;
+    for (Monomer monomer : monomers)
+    {
+      seq[pos] = monomer.getGroup1();
+
+      /*
+       * JAL-1828 replace a modified amino acid with its standard
+       * equivalent (e.g. MSE with MET->M) to maximise sequence matching
+       */
+      replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos);
+
+      /*
+       * if Jmol has derived a secondary structure prediction for
+       * this position, convert it to Jalview equivalent and save it
+       */
+      setSecondaryStructure(monomer.getProteinStructureSubType(), pos,
+              secstr, secstrcode);
+      pos++;
+    }
+  }
+
+  /**
+   * Helper method that adds an AlignmentAnnotation for secondary structure to
+   * the sequence, provided at least one secondary structure prediction has been
+   * made
+   * 
+   * @param modelTitle
+   * @param seq
+   * @param secstr
+   * @param secstrcode
+   * @param chainId
+   * @param firstResNum
+   * @return
+   */
+  protected void addSecondaryStructureAnnotation(String modelTitle,
+          SequenceI sq, char[] secstr, char[] secstrcode,
+          String chainId, int firstResNum)
+  {
+    char[] seq = sq.getSequence();
+    boolean ssFound = false;
+    Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1];
+    for (int p = 0; p < seq.length; p++)
+    {
+      if (secstr[p] >= 'A' && secstr[p] <= 'z')
+      {
+        asecstr[p] = new Annotation(String.valueOf(secstr[p]), null,
+                secstrcode[p], Float.NaN);
+        ssFound = true;
+      }
+    }
+
+    if (ssFound)
+    {
+      String mt = modelTitle == null ? getDataName() : modelTitle;
+      mt += chainId;
+      AlignmentAnnotation ann = new AlignmentAnnotation(
+              "Secondary Structure", "Secondary Structure for " + mt,
+              asecstr);
+      ann.belowAlignment = true;
+      ann.visible = true;
+      ann.autoCalculated = false;
+      ann.setCalcId(getClass().getName());
+      ann.adjustForAlignment();
+      ann.validateRangeAndDisplay();
+      annotations.add(ann);
+      sq.addAlignmentAnnotation(ann);
+    }
+  }
+
+  /**
+   * Replace any non-gap miscellaneous characters with 'X'
+   * 
+   * @param seq
+   * @return
+   */
+  protected void convertNonGapCharacters(char[] seq)
+  {
+    boolean isNa = Comparison.areNucleotide(new char[][] { seq });
+    int[] cinds = isNa ? ResidueProperties.nucleotideIndex
+            : ResidueProperties.aaIndex;
+    int nonGap = isNa ? ResidueProperties.maxNucleotideIndex
+            : ResidueProperties.maxProteinIndex;
+
+    for (int p = 0; p < seq.length; p++)
+    {
+      if (cinds[seq[p]] == nonGap)
+      {
+        seq[p] = 'X';
+      }
+    }
+  }
+
+  /**
+   * @param sq
+   * @param chainId
+   */
+  protected void addPdbid(SequenceI sq, String chainId)
+  {
+    PDBEntry pdbe = new PDBEntry();
+    pdbe.setFile(getDataName());
+    pdbe.setId(getDataName());
+    pdbe.setProperty(new Hashtable());
+    // pdbe.getProperty().put("CHAIN", "" + _lastChainId);
+    pdbe.setChainCode(chainId);
+    sq.addPDBId(pdbe);
+  }
+
+  /**
+   * Scans the list of Monomers (residue models), inspecting the chain id for
+   * each, and returns an array whose length is the number of chains, and values
+   * the length of each chain
+   * 
+   * @param monomers
+   * @return
+   */
+  protected List<Integer> getChainLengths(List<Monomer> monomers)
+  {
+    List<Integer> chainLengths = new ArrayList<Integer>();
+    int lastChainId = -1;
+    int length = 0;
+
+    for (Monomer monomer : monomers)
+    {
+      int chainId = monomer.chain.chainID;
+      if (chainId != lastChainId && length > 0)
+      {
+        /*
+         * change of chain - record the length of the last one
+         */
+        chainLengths.add(length);
+        length = 0;
+      }
+      lastChainId = chainId;
+      length++;
+    }
+    if (length > 0)
+    {
+      /*
+       * record the length of the final chain
+       */
+      chainLengths.add(length);
+    }
+
+    return chainLengths;
+  }
+
+  /**
+   * Returns a flattened list of Monomer (residue) in order, across all
+   * BioPolymers in the model. This simplifies assembling chains which span
+   * BioPolymers. The result does not include any alternate residues reported
+   * for the same sequence position (RESNUM value).
+   * 
+   * @param ms
+   * @param model
+   * @return
+   */
+  protected List<Monomer> getMonomers(ModelSet ms, BioModel model)
+  {
+    List<Monomer> result = new ArrayList<Monomer>();
+    String lastSeqCode = "";
+
+    for (BioPolymer bp : model.bioPolymers) {
+      for (int groupLeadAtoms : bp.getLeadAtomIndices())
+      {
+        Group group = ms.at[groupLeadAtoms].group;
+        if (group instanceof Monomer)
+        {
+          /*
+           * ignore alternate residue at same position
+           * example: 1ejg has residues A:LEU, B:ILE, C:ILE at RESNUM=25
+           */
+          String seqcodeString = group.getSeqcodeString();
+          if (!lastSeqCode.equals(seqcodeString))
+          {
+            result.add((Monomer) group);
+          }
+          else
+          {
+            System.out.println("skipping");
+          }
+          lastSeqCode = seqcodeString;
+        }
+      }
+    }
+    return result;
+  }
+
 }
diff --git a/test/jalview/ext/jmol/PDBFileWithJmolTest.java b/test/jalview/ext/jmol/PDBFileWithJmolTest.java
index 4ccb6e3..fd02d00 100644
--- a/test/jalview/ext/jmol/PDBFileWithJmolTest.java
+++ b/test/jalview/ext/jmol/PDBFileWithJmolTest.java
@@ -44,11 +44,40 @@ import MCview.PDBfile;
  */
 public class PDBFileWithJmolTest
 {
+  /*
+   * 1GAQ has been reduced to alpha carbons only
+   * 1QCF is the full PDB file including headers, HETATM etc
+   */
   String[] testFile = new String[] { "./examples/1GAQ.txt",
       "./test/jalview/ext/jmol/1QCF.pdb" }; // ,
 
-  // "./examples/DNMT1_MOUSE.pdb"
-  // };
+  //@formatter:off
+  // a modified and very cut-down extract of 4UJ4
+  String pdbWithChainBreak =
+     "HEADER    TRANSPORT PROTEIN                       08-APR-15   4UJ4\n" +
+     // chain B has missing residues; these should all go in the same sequence:
+     "ATOM   1909  CA  VAL B 358      21.329 -19.739 -67.740  1.00201.05           C\n" +
+     "ATOM   1916  CA  GLY B 359      21.694 -23.563 -67.661  1.00198.09           C\n" +
+     "ATOM   1920  CA  LYS B 367      32.471 -12.135 -77.100  1.00257.97           C\n" +
+     "ATOM   1925  CA  ALA B 368      31.032  -9.324 -74.946  1.00276.01           C\n" +
+     // switch to chain C; should be a separate sequence
+     "ATOM   1930  CA  SER C 369      32.589  -7.517 -71.978  1.00265.44           C\n" +
+     "ATOM   1936  CA  ALA C 370      31.650  -6.849 -68.346  1.00249.48           C\n";
+  //@formatter:on
+
+  //@formatter:off
+  // a very cut-down extract of 1ejg
+  String pdbWithAltLoc =
+     "HEADER    TRANSPORT PROTEIN                       08-APR-15   1EJG\n" +
+     "ATOM    448  CA  ALA A  24       6.619  16.195   1.970  1.00  1.65           C\n" +
+     "ATOM    458  CA ALEU A  25       3.048  14.822   1.781  0.57  1.48           C\n" +
+     // alternative residue 25 entries (with ILE instead of LEU) should be ignored:
+     "ATOM    478  CA BILE A  25       3.048  14.822   1.781  0.21  1.48           C\n" +
+     // including the next altloc causes the unit test to fail but it works with the full file
+     // not sure why!
+     //     "ATOM    479  CA CILE A  25       3.048  14.822   1.781  0.22  1.48           C\n" +
+     "ATOM    512  CA  CYS A  26       4.137  11.461   3.154  1.00  1.52           C\n";
+  //@formatter:on
 
   @BeforeMethod(alwaysRun = true)
   public void setUp()
@@ -133,4 +162,48 @@ public class PDBFileWithJmolTest
             "Secondary structure not associated for sequence "
                     + sq.getName(), sq.getAnnotation()[0].sequenceRef == sq);
   }
+
+  /**
+   * Test parsing a chain with missing residues
+   * 
+   * @throws Exception
+   */
+  @Test(groups = { "Functional" })
+  public void testParse_missingResidues() throws Exception
+  {
+    PDBfile mctest = new PDBfile(false, false, false, pdbWithChainBreak,
+            AppletFormatAdapter.PASTE);
+    PDBFileWithJmol jtest = new PDBFileWithJmol(pdbWithChainBreak,
+            jalview.io.AppletFormatAdapter.PASTE);
+    Vector<SequenceI> seqs = jtest.getSeqs();
+    Vector<SequenceI> mcseqs = mctest.getSeqs();
+
+    assertEquals("Failed to find 2 sequences\n", 2, seqs.size());
+    assertEquals("Failed to find 2 sequences\n", 2, mcseqs.size());
+    assertEquals("VGKA", seqs.get(0).getSequenceAsString());
+    assertEquals("VGKA", mcseqs.get(0).getSequenceAsString());
+    assertEquals("SA", seqs.get(1).getSequenceAsString());
+    assertEquals("SA", mcseqs.get(1).getSequenceAsString());
+  }
+
+  /**
+   * Test parsing a chain with 'altloc' residues
+   * 
+   * @throws Exception
+   */
+  @Test(groups = { "Functional" })
+  public void testParse_alternativeResidues() throws Exception
+  {
+    PDBfile mctest = new PDBfile(false, false, false, pdbWithAltLoc,
+            AppletFormatAdapter.PASTE);
+    PDBFileWithJmol jtest = new PDBFileWithJmol(pdbWithAltLoc,
+            jalview.io.AppletFormatAdapter.PASTE);
+    Vector<SequenceI> seqs = jtest.getSeqs();
+    Vector<SequenceI> mcseqs = mctest.getSeqs();
+  
+    assertEquals("Failed to find 1 sequence\n", 1, seqs.size());
+    assertEquals("Failed to find 1 sequence\n", 1, mcseqs.size());
+    assertEquals("ALC", seqs.get(0).getSequenceAsString());
+    assertEquals("ALC", mcseqs.get(0).getSequenceAsString());
+  }
 }