2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.jmol;
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.Annotation;
25 import jalview.datamodel.DBRefEntry;
26 import jalview.datamodel.DBRefSource;
27 import jalview.datamodel.PDBEntry;
28 import jalview.datamodel.Sequence;
29 import jalview.datamodel.SequenceI;
30 import jalview.io.AlignFile;
31 import jalview.io.FileParse;
32 import jalview.schemes.ResidueProperties;
33 import jalview.util.Comparison;
34 import jalview.util.MessageManager;
36 import java.io.IOException;
37 import java.util.ArrayList;
38 import java.util.Collection;
39 import java.util.Hashtable;
40 import java.util.List;
43 import javajs.awt.Dimension;
45 import org.jmol.api.JmolStatusListener;
46 import org.jmol.api.JmolViewer;
47 import org.jmol.c.CBK;
48 import org.jmol.c.STR;
49 import org.jmol.modelset.Group;
50 import org.jmol.modelset.Model;
51 import org.jmol.modelset.ModelSet;
52 import org.jmol.modelsetbio.BioModel;
53 import org.jmol.modelsetbio.BioPolymer;
54 import org.jmol.modelsetbio.Monomer;
55 import org.jmol.viewer.Viewer;
57 import MCview.PDBChain;
60 * Import and process files with Jmol for file like PDB, mmCIF
65 public class JmolParser extends AlignFile implements
70 private Collection<PDBChain> chains;
73 * Set true to predict secondary structure (using JMol for protein, Annotate3D
76 private boolean predictSecondaryStructure = true;
78 public JmolParser(String inFile, String type) throws IOException
83 public JmolParser(FileParse fp) throws IOException
93 * create a headless jmol instance for dataprocessing
97 private Viewer getJmolData()
103 viewer = (Viewer) JmolViewer.allocateViewer(null, null, null, null,
104 null, "-x -o -n", this);
105 // ensure the 'new' (DSSP) not 'old' (Ramachandran) SS method is used
106 viewer.setBooleanProperty("defaultStructureDSSP", true);
107 } catch (ClassCastException x)
109 throw new Error(MessageManager.formatMessage(
110 "error.jmol_version_not_compatible_with_jalview_version",
111 new String[] { JmolViewer.getJmolVersion() }), x);
117 private void waitForScript(Viewer jmd)
119 while (jmd.isScriptExecuting())
125 } catch (InterruptedException x)
132 * Convert Jmol's secondary structure code to Jalview's, and stored it in the
133 * secondary structure arrays at the given sequence position
135 * @param proteinStructureSubType
140 protected void setSecondaryStructure(STR proteinStructureSubType,
141 int pos, char[] secstr, char[] secstrcode)
143 switch (proteinStructureSubType)
162 switch (proteinStructureSubType)
168 secstrcode[pos] = 'H';
171 secstrcode[pos] = 'E';
179 * Convert any non-standard peptide codes to their standard code table
180 * equivalent. (Initial version only does Selenomethionine MSE->MET.)
182 * @param threeLetterCode
186 protected void replaceNonCanonicalResidue(String threeLetterCode,
189 String canonical = ResidueProperties
190 .getCanonicalAminoAcid(threeLetterCode);
191 if (canonical != null && !canonical.equalsIgnoreCase(threeLetterCode))
193 seq[pos] = ResidueProperties.getSingleCharacterCode(canonical);
198 * Not implemented - returns null
201 public String print()
210 public void setCallbackFunction(String callbackType,
211 String callbackFunction)
216 public void notifyCallback(CBK cbType, Object[] data)
218 String strInfo = (data == null || data[1] == null ? null : data[1]
223 sendConsoleEcho(strInfo);
226 notifyScriptTermination((String) data[2],
227 ((Integer) data[3]).intValue());
230 String mystatus = (String) data[3];
231 if (mystatus.indexOf("Picked") >= 0
232 || mystatus.indexOf("Sequence") >= 0)
235 sendConsoleMessage(strInfo);
237 else if (mystatus.indexOf("Completed") >= 0)
239 sendConsoleEcho(strInfo.substring(strInfo.lastIndexOf(",") + 2,
240 strInfo.length() - 1));
244 sendConsoleMessage(data == null ? null : strInfo);
247 sendConsoleMessage(strInfo);
254 String lastConsoleEcho = "";
256 private void sendConsoleEcho(String string)
258 lastConsoleEcho += string;
259 lastConsoleEcho += "\n";
262 String lastConsoleMessage = "";
264 private void sendConsoleMessage(String string)
266 lastConsoleMessage += string;
267 lastConsoleMessage += "\n";
270 int lastScriptTermination = -1;
272 String lastScriptMessage = "";
274 private void notifyScriptTermination(String string, int intValue)
276 lastScriptMessage += string;
277 lastScriptMessage += "\n";
278 lastScriptTermination = intValue;
282 public boolean notifyEnabled(CBK callbackPick)
284 switch (callbackPick)
298 * Not implemented - returns null
301 public String eval(String strEval)
307 * Not implemented - returns null
310 public float[][] functionXY(String functionName, int x, int y)
316 * Not implemented - returns null
319 public float[][][] functionXYZ(String functionName, int nx, int ny, int nz)
325 * Not implemented - returns null
328 public String createImage(String fileName, String imageType,
329 Object text_or_bytes, int quality)
335 * Not implemented - returns null
338 public Map<String, Object> getRegistryInfo()
347 public void showUrl(String url)
352 * Not implemented - returns null
355 public Dimension resizeInnerPanel(String data)
361 public Map<String, Object> getJSpecViewProperty(String arg0)
367 * Calls the Jmol library to parse the PDB file, and then inspects the
368 * resulting object model to generate Jalview-style sequences, with secondary
369 * structure annotation added where available (i.e. where it has been computed
370 * by Jmol using DSSP).
372 * @see jalview.io.AlignFile#parse()
375 public void parse() throws IOException
378 chains = new ArrayList<PDBChain>();
379 Viewer jmolModel = getJmolData();
380 jmolModel.openReader(getDataName(), getDataName(), getReader());
381 waitForScript(jmolModel);
384 * Convert one or more Jmol Model objects to Jalview sequences
386 if (jmolModel.ms.mc > 0)
388 parseBiopolymers(jmolModel.ms);
393 * Process the Jmol BioPolymer array and generate a Jalview sequence for each
394 * chain found (including any secondary structure annotation from DSSP)
397 * @throws IOException
399 public void parseBiopolymers(ModelSet ms) throws IOException
402 for (Model model : ms.am)
405 String modelTitle = (String) ms.getInfo(modelIndex, "title");
408 * Chains can span BioPolymers, so first make a flattened list,
409 * and then work out the lengths of chains present
411 List<Monomer> monomers = getMonomers(ms, (BioModel) model);
412 List<Integer> chainLengths = getChainLengths(monomers);
415 * now chop up the Monomer list to make Jalview Sequences
418 for (int length : chainLengths)
420 buildSequenceFromChain(monomers.subList(from, from + length), modelTitle);
427 * Helper method to construct a sequence for one chain and add it to the seqs
431 * a list of all monomers in the chain
434 protected void buildSequenceFromChain(List<Monomer> monomers, String modelTitle)
436 final int length = monomers.size();
439 * arrays to hold sequence and secondary structure
441 char[] seq = new char[length];
442 char[] secstr = new char[length];
443 char[] secstrcode = new char[length];
446 * populate the sequence and secondary structure arrays
448 extractJmolChainData(monomers, seq, secstr, secstrcode);
451 * grab chain code and start position from first residue;
453 String chainId = monomers.get(0).chain.getIDStr();
454 int firstResNum = monomers.get(0).getResno();
457 // Jalview doesn't like residue < 1, so force this to 1
458 System.err.println("Converting chain " + chainId + " first RESNUM ("
459 + firstResNum + ") to 1");
464 * convert any non-gap unknown residues to 'X'
466 convertNonGapCharacters(seq);
469 * construct and add the Jalview sequence
471 String seqName = "" + modelTitle + "|"
473 int start = firstResNum;
474 int end = firstResNum + length - 1;
476 SequenceI sq = new Sequence(seqName, seq, start, end);
478 addPdbid(sq, modelTitle, chainId);
480 addSourceDBref(sq, modelTitle, start, end);
484 addChainMetaData(sq, monomers, chainId);
487 * add secondary structure predictions (if any)
489 if (isPredictSecondaryStructure())
491 addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode,
492 chainId, firstResNum);
497 public void addChainMetaData(SequenceI sq, List<Monomer> monomers,
500 for (char res : sq.getSequence())
507 * Add a source db ref entry for the given sequence.
514 protected void addSourceDBref(SequenceI sq, String accessionId,
517 DBRefEntry sourceDBRef = new DBRefEntry();
518 sourceDBRef.setAccessionId(accessionId);
519 sourceDBRef.setSource(DBRefSource.MMCIF);
520 sourceDBRef.setStartRes(start);
521 sourceDBRef.setEndRes(end);
522 sq.setSourceDBRef(sourceDBRef);
523 sq.addDBRef(sourceDBRef);
528 * Add a PDBEntry giving the source of PDB data to the sequence
534 protected void addPdbid(SequenceI sq, String id, String chainId)
536 PDBEntry entry = new PDBEntry();
538 entry.setType(PDBEntry.Type.MMCIF);
539 entry.setProperty(new Hashtable());
542 // entry.getProperty().put("CHAIN", chains.elementAt(i).id);
543 entry.setChainCode(String.valueOf(chainId));
547 entry.setFile(inFile.getAbsolutePath());
551 // TODO: decide if we should dump the datasource to disk
552 entry.setFile(getDataName());
559 * Scans the list of (Jmol) Monomer objects, and adds the residue for each to
560 * the sequence array, and any converted secondary structure prediction to the
561 * secondary structure arrays
568 protected void extractJmolChainData(List<Monomer> monomers, char[] seq,
569 char[] secstr, char[] secstrcode)
572 for (Monomer monomer : monomers)
574 seq[pos] = monomer.getGroup1();
577 * JAL-1828 replace a modified amino acid with its standard
578 * equivalent (e.g. MSE with MET->M) to maximise sequence matching
580 replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos);
583 * if Jmol has derived a secondary structure prediction for
584 * this position, convert it to Jalview equivalent and save it
586 setSecondaryStructure(monomer.getProteinStructureSubType(), pos,
593 * Helper method that adds an AlignmentAnnotation for secondary structure to
594 * the sequence, provided at least one secondary structure prediction has been
605 protected void addSecondaryStructureAnnotation(String modelTitle,
606 SequenceI sq, char[] secstr, char[] secstrcode,
607 String chainId, int firstResNum)
609 char[] seq = sq.getSequence();
610 boolean ssFound = false;
611 Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1];
612 for (int p = 0; p < seq.length; p++)
614 if (secstr[p] >= 'A' && secstr[p] <= 'z')
616 asecstr[p] = new Annotation(String.valueOf(secstr[p]), null,
617 secstrcode[p], Float.NaN);
624 String mt = modelTitle == null ? getDataName() : modelTitle;
626 AlignmentAnnotation ann = new AlignmentAnnotation(
627 "Secondary Structure", "Secondary Structure for " + mt,
629 ann.belowAlignment = true;
631 ann.autoCalculated = false;
632 ann.setCalcId(getClass().getName());
633 ann.adjustForAlignment();
634 ann.validateRangeAndDisplay();
635 annotations.add(ann);
636 sq.addAlignmentAnnotation(ann);
641 * Replace any non-gap miscellaneous characters with 'X'
646 protected void convertNonGapCharacters(char[] seq)
648 boolean isNa = Comparison.areNucleotide(new char[][] { seq });
649 int[] cinds = isNa ? ResidueProperties.nucleotideIndex
650 : ResidueProperties.aaIndex;
651 int nonGap = isNa ? ResidueProperties.maxNucleotideIndex
652 : ResidueProperties.maxProteinIndex;
654 for (int p = 0; p < seq.length; p++)
656 if (cinds[seq[p]] == nonGap)
665 * Scans the list of Monomers (residue models), inspecting the chain id for
666 * each, and returns an array whose length is the number of chains, and values
667 * the length of each chain
672 protected List<Integer> getChainLengths(List<Monomer> monomers)
674 List<Integer> chainLengths = new ArrayList<Integer>();
675 int lastChainId = -1;
678 for (Monomer monomer : monomers)
680 int chainId = monomer.chain.chainID;
681 if (chainId != lastChainId && length > 0)
684 * change of chain - record the length of the last one
686 chainLengths.add(length);
689 lastChainId = chainId;
695 * record the length of the final chain
697 chainLengths.add(length);
704 * Returns a flattened list of Monomer (residues) in order, across all
705 * BioPolymers in the model. This simplifies assembling chains which span
706 * BioPolymers. The result omits any alternate residues reported for the same
707 * sequence position (RESNUM value).
713 protected List<Monomer> getMonomers(ModelSet ms, BioModel model)
715 List<Monomer> result = new ArrayList<Monomer>();
716 int lastResNo = Integer.MIN_VALUE;
718 for (BioPolymer bp : model.bioPolymers)
720 for (int groupLeadAtoms : bp.getLeadAtomIndices())
722 Group group = ms.at[groupLeadAtoms].group;
723 if (group instanceof Monomer)
726 * ignore alternate residue at same position
727 * example: 1ejg has residues A:LEU, B:ILE at RESNUM=25
729 int resNo = group.getResno();
730 if (lastResNo != resNo)
732 result.add((Monomer) group);
741 public boolean isPredictSecondaryStructure()
743 return predictSecondaryStructure;
746 public void setPredictSecondaryStructure(boolean predictSecondaryStructure)
748 this.predictSecondaryStructure = predictSecondaryStructure;
751 public Collection<PDBChain> getChains()
756 public void setChains(Collection<PDBChain> chains)
758 this.chains = chains;