+ /**
+ * Calls the Jmol library to parse the PDB file, and then inspects the
+ * resulting object model to generate Jalview-style sequences, with secondary
+ * structure annotation added where available (i.e. where it has been computed
+ * by Jmol using DSSP).
+ *
+ * @see jalview.io.AlignFile#parse()
+ */
+ @Override
+ public void parse() throws IOException
+ {
+ Viewer jmolModel = getJmolData();
+ jmolModel.openReader(getDataName(), getDataName(), getReader());
+ waitForScript(jmolModel);
+
+ /*
+ * Convert one or more Jmol Model objects to Jalview objects.
+ */
+ if (jmolModel.ms.mc > 0)
+ {
+ parseBiopolymers(jmolModel.ms);
+ }
+ }
+
+ /**
+ * Process the Jmol BioPolymer array and generate a Jalview sequence for each
+ * chain found (including any secondary structure annotation from DSSP)
+ *
+ * @param ms
+ * @throws IOException
+ */
+ public void parseBiopolymers(ModelSet ms) throws IOException
+ {
+ int modelIndex = -1;
+ for (Model model : ms.am)
+ {
+ modelIndex++;
+ String modelTitle = (String) ms.getInfo(modelIndex, "title");
+
+ /*
+ * as chains can span BioPolymers, we first make a flattened list,
+ * and then work out the lengths of chains present
+ */
+ List<Monomer> monomers = getMonomers(ms, (BioModel) model);
+ List<Integer> chainLengths = getChainLengths(monomers);
+
+ /*
+ * now chop up the Monomer list to make Jalview Sequences
+ */
+ int from = 0;
+ for (int length : chainLengths)
+ {
+ buildSequenceFromChain(monomers.subList(from, from + length), modelTitle);
+ from += length;
+ }
+ }
+ }
+
+ /**
+ * Helper method to construct a sequence for one chain and add it to the seqs
+ * list
+ *
+ * @param monomers
+ * a list of all monomers in the chain
+ * @param modelTitle
+ */
+ protected void buildSequenceFromChain(List<Monomer> monomers, String modelTitle)
+ {
+ final int length = monomers.size();
+
+ /*
+ * arrays to hold sequence and secondary structure
+ */
+ char[] seq = new char[length];
+ char[] secstr = new char[length];
+ char[] secstrcode = new char[length];
+
+ /*
+ * populate the sequence and secondary structure arrays
+ */
+ extractJmolChainData(monomers, seq, secstr, secstrcode);
+
+ /*
+ * grab chain code and start position from first residue;
+ */
+ String chainId = monomers.get(0).chain.getIDStr();
+ int firstResNum = monomers.get(0).getResno();
+ if (firstResNum < 1)
+ {
+ // Jalview doesn't like residue < 1, so force this to 1
+ System.err.println("Converting chain " + chainId + " first RESNUM ("
+ + firstResNum + ") to 1");
+ firstResNum = 1;
+ }
+
+ /*
+ * convert any non-gap unknown residues to 'X'
+ */
+ convertNonGapCharacters(seq);
+
+ /*
+ * construct and add the Jalview sequence
+ */
+ SequenceI sq = new Sequence("" + getDataName() + "|" + modelTitle + "|"
+ + chainId, seq, firstResNum, firstResNum + length - 1);
+ seqs.add(sq);
+
+ /*
+ * add secondary structure predictions (if any)
+ */
+ addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode,
+ chainId, firstResNum);
+
+ /*
+ * record the PDB id for the sequence
+ */
+ addPdbid(sq, chainId);
+ }
+
+ /**
+ * Scans the list of (Jmol) Monomer objects, and adds the residue for each to
+ * the sequence array, and any converted secondary structure prediction to the
+ * secondary structure arrays
+ *
+ * @param monomers
+ * @param seq
+ * @param secstr
+ * @param secstrcode
+ */
+ protected void extractJmolChainData(List<Monomer> monomers, char[] seq,
+ char[] secstr, char[] secstrcode)
+ {
+ int pos = 0;
+ for (Monomer monomer : monomers)
+ {
+ seq[pos] = monomer.getGroup1();
+
+ /*
+ * JAL-1828 replace a modified amino acid with its standard
+ * equivalent (e.g. MSE with MET->M) to maximise sequence matching
+ */
+ replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos);
+
+ /*
+ * if Jmol has derived a secondary structure prediction for
+ * this position, convert it to Jalview equivalent and save it
+ */
+ setSecondaryStructure(monomer.getProteinStructureSubType(), pos,
+ secstr, secstrcode);
+ pos++;
+ }
+ }
+
+ /**
+ * Helper method that adds an AlignmentAnnotation for secondary structure to
+ * the sequence, provided at least one secondary structure prediction has been
+ * made
+ *
+ * @param modelTitle
+ * @param seq
+ * @param secstr
+ * @param secstrcode
+ * @param chainId
+ * @param firstResNum
+ * @return
+ */
+ protected void addSecondaryStructureAnnotation(String modelTitle,
+ SequenceI sq, char[] secstr, char[] secstrcode,
+ String chainId, int firstResNum)
+ {
+ char[] seq = sq.getSequence();
+ boolean ssFound = false;
+ Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1];
+ for (int p = 0; p < seq.length; p++)
+ {
+ if (secstr[p] >= 'A' && secstr[p] <= 'z')
+ {
+ asecstr[p] = new Annotation(String.valueOf(secstr[p]), null,
+ secstrcode[p], Float.NaN);
+ ssFound = true;
+ }
+ }
+
+ if (ssFound)
+ {
+ String mt = modelTitle == null ? getDataName() : modelTitle;
+ mt += chainId;
+ AlignmentAnnotation ann = new AlignmentAnnotation(
+ "Secondary Structure", "Secondary Structure for " + mt,
+ asecstr);
+ ann.belowAlignment = true;
+ ann.visible = true;
+ ann.autoCalculated = false;
+ ann.setCalcId(getClass().getName());
+ ann.adjustForAlignment();
+ ann.validateRangeAndDisplay();
+ annotations.add(ann);
+ sq.addAlignmentAnnotation(ann);
+ }
+ }
+
+ /**
+ * Replace any non-gap miscellaneous characters with 'X'
+ *
+ * @param seq
+ * @return
+ */
+ protected void convertNonGapCharacters(char[] seq)
+ {
+ boolean isNa = Comparison.areNucleotide(new char[][] { seq });
+ int[] cinds = isNa ? ResidueProperties.nucleotideIndex
+ : ResidueProperties.aaIndex;
+ int nonGap = isNa ? ResidueProperties.maxNucleotideIndex
+ : ResidueProperties.maxProteinIndex;
+
+ for (int p = 0; p < seq.length; p++)
+ {
+ if (cinds[seq[p]] == nonGap)
+ {
+ seq[p] = 'X';
+ }
+ }
+ }
+
+ /**
+ * @param sq
+ * @param chainId
+ */
+ protected void addPdbid(SequenceI sq, String chainId)
+ {
+ PDBEntry pdbe = new PDBEntry();
+ pdbe.setFile(getDataName());
+ pdbe.setId(getDataName());
+ pdbe.setProperty(new Hashtable());
+ // pdbe.getProperty().put("CHAIN", "" + _lastChainId);
+ pdbe.setChainCode(chainId);
+ sq.addPDBId(pdbe);
+ }
+
+ /**
+ * Scans the list of Monomers (residue models), inspecting the chain id for
+ * each, and returns an array whose length is the number of chains, and values
+ * the length of each chain
+ *
+ * @param monomers
+ * @return
+ */
+ protected List<Integer> getChainLengths(List<Monomer> monomers)
+ {
+ List<Integer> chainLengths = new ArrayList<Integer>();
+ int lastChainId = -1;
+ int length = 0;
+
+ for (Monomer monomer : monomers)
+ {
+ int chainId = monomer.chain.chainID;
+ if (chainId != lastChainId && length > 0)
+ {
+ /*
+ * change of chain - record the length of the last one
+ */
+ chainLengths.add(length);
+ length = 0;
+ }
+ lastChainId = chainId;
+ length++;
+ }
+ if (length > 0)
+ {
+ /*
+ * record the length of the final chain
+ */
+ chainLengths.add(length);
+ }
+
+ return chainLengths;
+ }
+
+ /**
+ * Returns a flattened list of Monomer (residue) in order, across all
+ * BioPolymers in the model. This simplifies assembling chains which span
+ * BioPolymers. The result does not include any alternate residues reported
+ * for the same sequence position (RESNUM value).
+ *
+ * @param ms
+ * @param model
+ * @return
+ */
+ protected List<Monomer> getMonomers(ModelSet ms, BioModel model)
+ {
+ List<Monomer> result = new ArrayList<Monomer>();
+ String lastSeqCode = "";
+
+ for (BioPolymer bp : model.bioPolymers) {
+ for (int groupLeadAtoms : bp.getLeadAtomIndices())
+ {
+ Group group = ms.at[groupLeadAtoms].group;
+ if (group instanceof Monomer)
+ {
+ /*
+ * ignore alternate residue at same position
+ * example: 1ejg has residues A:LEU, B:ILE, C:ILE at RESNUM=25
+ */
+ String seqcodeString = group.getSeqcodeString();
+ if (!lastSeqCode.equals(seqcodeString))
+ {
+ result.add((Monomer) group);
+ }
+ else
+ {
+ System.out.println("skipping");
+ }
+ lastSeqCode = seqcodeString;
+ }
+ }
+ }
+ return result;
+ }
+