2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ext.jmol;
23 import jalview.datamodel.AlignmentAnnotation;
24 import jalview.datamodel.Annotation;
25 import jalview.datamodel.DBRefEntry;
26 import jalview.datamodel.DBRefSource;
27 import jalview.datamodel.PDBEntry;
28 import jalview.datamodel.Sequence;
29 import jalview.datamodel.SequenceI;
30 import jalview.io.FileParse;
31 import jalview.io.StructureFile;
32 import jalview.schemes.ResidueProperties;
33 import jalview.util.Comparison;
34 import jalview.util.MessageManager;
36 import java.io.IOException;
37 import java.util.ArrayList;
38 import java.util.Hashtable;
39 import java.util.List;
41 import java.util.Vector;
43 import javajs.awt.Dimension;
45 import org.jmol.api.JmolStatusListener;
46 import org.jmol.api.JmolViewer;
47 import org.jmol.c.CBK;
48 import org.jmol.c.STR;
49 import org.jmol.modelset.Group;
50 import org.jmol.modelset.Model;
51 import org.jmol.modelset.ModelSet;
52 import org.jmol.modelsetbio.BioModel;
53 import org.jmol.modelsetbio.BioPolymer;
54 import org.jmol.modelsetbio.Monomer;
55 import org.jmol.viewer.Viewer;
58 import MCview.PDBChain;
61 * Import and process files with Jmol for file like PDB, mmCIF
66 public class JmolParser extends StructureFile implements JmolStatusListener
70 public JmolParser(boolean addAlignmentAnnotations,
71 boolean predictSecondaryStructure, boolean externalSecStr,
72 String inFile, String type) throws IOException
75 this.visibleChainAnnotation = addAlignmentAnnotations;
76 this.predictSecondaryStructure = predictSecondaryStructure;
77 this.externalSecondaryStructure = externalSecStr;
80 public JmolParser(boolean addAlignmentAnnotations,
81 boolean predictSecondaryStructure, boolean externalSecStr,
82 FileParse fp) throws IOException
85 this.visibleChainAnnotation = addAlignmentAnnotations;
86 this.predictSecondaryStructure = predictSecondaryStructure;
87 this.externalSecondaryStructure = externalSecStr;
90 public JmolParser(FileParse fp) throws IOException
95 public JmolParser(String inFile, String type) throws IOException
105 * create a headless jmol instance for dataprocessing
109 private Viewer getJmolData()
115 viewer = (Viewer) JmolViewer.allocateViewer(null, null, null, null,
116 null, "-x -o -n", this);
117 // ensure the 'new' (DSSP) not 'old' (Ramachandran) SS method is used
118 viewer.setBooleanProperty("defaultStructureDSSP", true);
119 } catch (ClassCastException x)
121 throw new Error(MessageManager.formatMessage(
122 "error.jmol_version_not_compatible_with_jalview_version",
123 new String[] { JmolViewer.getJmolVersion() }), x);
129 private void waitForScript(Viewer jmd)
131 while (jmd.isScriptExecuting())
137 } catch (InterruptedException x)
144 * Convert Jmol's secondary structure code to Jalview's, and stored it in the
145 * secondary structure arrays at the given sequence position
147 * @param proteinStructureSubType
152 protected void setSecondaryStructure(STR proteinStructureSubType,
153 int pos, char[] secstr, char[] secstrcode)
155 switch (proteinStructureSubType)
174 switch (proteinStructureSubType)
180 secstrcode[pos] = 'H';
183 secstrcode[pos] = 'E';
191 * Convert any non-standard peptide codes to their standard code table
192 * equivalent. (Initial version only does Selenomethionine MSE->MET.)
194 * @param threeLetterCode
198 protected void replaceNonCanonicalResidue(String threeLetterCode,
201 String canonical = ResidueProperties
202 .getCanonicalAminoAcid(threeLetterCode);
203 if (canonical != null && !canonical.equalsIgnoreCase(threeLetterCode))
205 seq[pos] = ResidueProperties.getSingleCharacterCode(canonical);
210 * Not implemented - returns null
213 public String print()
222 public void setCallbackFunction(String callbackType,
223 String callbackFunction)
228 public void notifyCallback(CBK cbType, Object[] data)
230 String strInfo = (data == null || data[1] == null ? null : data[1]
235 sendConsoleEcho(strInfo);
238 notifyScriptTermination((String) data[2],
239 ((Integer) data[3]).intValue());
242 String mystatus = (String) data[3];
243 if (mystatus.indexOf("Picked") >= 0
244 || mystatus.indexOf("Sequence") >= 0)
247 sendConsoleMessage(strInfo);
249 else if (mystatus.indexOf("Completed") >= 0)
251 sendConsoleEcho(strInfo.substring(strInfo.lastIndexOf(",") + 2,
252 strInfo.length() - 1));
256 sendConsoleMessage(data == null ? null : strInfo);
259 sendConsoleMessage(strInfo);
266 String lastConsoleEcho = "";
268 private void sendConsoleEcho(String string)
270 lastConsoleEcho += string;
271 lastConsoleEcho += "\n";
274 String lastConsoleMessage = "";
276 private void sendConsoleMessage(String string)
278 lastConsoleMessage += string;
279 lastConsoleMessage += "\n";
282 int lastScriptTermination = -1;
284 String lastScriptMessage = "";
286 private void notifyScriptTermination(String string, int intValue)
288 lastScriptMessage += string;
289 lastScriptMessage += "\n";
290 lastScriptTermination = intValue;
294 public boolean notifyEnabled(CBK callbackPick)
296 switch (callbackPick)
310 * Not implemented - returns null
313 public String eval(String strEval)
319 * Not implemented - returns null
322 public float[][] functionXY(String functionName, int x, int y)
328 * Not implemented - returns null
331 public float[][][] functionXYZ(String functionName, int nx, int ny, int nz)
337 * Not implemented - returns null
340 public String createImage(String fileName, String imageType,
341 Object text_or_bytes, int quality)
347 * Not implemented - returns null
350 public Map<String, Object> getRegistryInfo()
359 public void showUrl(String url)
364 * Not implemented - returns null
367 public Dimension resizeInnerPanel(String data)
373 public Map<String, Object> getJSpecViewProperty(String arg0)
379 * Calls the Jmol library to parse the PDB file, and then inspects the
380 * resulting object model to generate Jalview-style sequences, with secondary
381 * structure annotation added where available (i.e. where it has been computed
382 * by Jmol using DSSP).
384 * @see jalview.io.AlignFile#parse()
387 public void parse() throws IOException
390 setChains(new Vector<PDBChain>());
391 Viewer jmolModel = getJmolData();
392 jmolModel.openReader(getDataName(), getDataName(), getReader());
393 waitForScript(jmolModel);
396 * Convert one or more Jmol Model objects to Jalview sequences
398 if (jmolModel.ms.mc > 0)
400 // parseBiopolymer(jmolModel.ms);
401 transformJmolModelToJalview(jmolModel.ms);
406 * Process the Jmol BioPolymer array and generate a Jalview sequence for each
407 * chain found (including any secondary structure annotation from DSSP)
410 * @throws IOException
412 public void parseBiopolymer(ModelSet ms) throws IOException
415 for (Model model : ms.am)
418 String modelTitle = (String) ms.getInfo(modelIndex, "title");
420 * Chains can span BioPolymers, so first make a flattened list, and then
421 * work out the lengths of chains present
423 List<Monomer> monomers = getMonomers(ms, (BioModel) model);
424 List<Integer> chainLengths = getChainLengths(monomers);
427 * now chop up the Monomer list to make Jalview Sequences
430 for (int length : chainLengths)
432 buildSequenceFromChain(monomers.subList(from, from + length),
439 public void transformJmolModelToJalview(ModelSet ms)
444 List<SequenceI> rna = new ArrayList<SequenceI>();
445 List<SequenceI> prot = new ArrayList<SequenceI>();
447 String pdbId = (String) ms.getInfo(0, "title");
449 List<Atom> significantAtoms = convertSignificantAtoms(ms);
450 for (Atom tmpatom : significantAtoms)
454 tmpchain = findChain(tmpatom.chain);
455 if (tmpatom.resNumIns.trim().equals(lastID))
457 // phosphorylated protein - seen both CA and P..
460 tmpchain.atoms.addElement(tmpatom);
461 } catch (Exception e)
463 tmpchain = new PDBChain(pdbId, tmpatom.chain);
464 getChains().add(tmpchain);
465 tmpchain.atoms.addElement(tmpatom);
467 lastID = tmpatom.resNumIns.trim();
474 setId(inFile.getName());
476 for (PDBChain chain : getChains())
478 SequenceI chainseq = postProcessChain(chain);
488 } catch (OutOfMemoryError er)
491 .println("OUT OF MEMORY LOADING TRANSFORMING JMOL MODEL TO JALVIEW MODEL");
492 // throw new IOException(
494 // .getString("exception.outofmemory_loading_pdb_file"));
498 private List<Atom> convertSignificantAtoms(ModelSet ms)
500 List<Atom> significantAtoms = new ArrayList<Atom>();
501 for (org.jmol.modelset.Atom atom : ms.at)
503 if (atom.getAtomName().equalsIgnoreCase("CA")
504 || atom.getAtomName().equalsIgnoreCase("P"))
506 Atom curAtom = new Atom(atom.x, atom.y, atom.z);
507 curAtom.atomIndex = atom.getIndex();
508 curAtom.chain = atom.getChainIDStr();
509 curAtom.insCode = atom.group.getInsertionCode();
510 curAtom.name = atom.getAtomName();
511 curAtom.number = atom.getAtomNumber();
512 curAtom.resName = atom.getGroup3(true);
513 curAtom.resNumber = atom.getResno();
514 curAtom.occupancy = ms.occupancies != null ? ms.occupancies[atom
515 .getIndex()] : Float.valueOf(atom.getOccupancy100());
516 curAtom.resNumIns = "" + curAtom.resNumber + curAtom.insCode;
519 significantAtoms.add(curAtom);
522 return significantAtoms;
526 * Helper method to construct a sequence for one chain and add it to the seqs
530 * a list of all monomers in the chain
533 protected void buildSequenceFromChain(List<Monomer> monomers,
536 final int length = monomers.size();
539 * arrays to hold sequence and secondary structure
541 char[] seq = new char[length];
542 char[] secstr = new char[length];
543 char[] secstrcode = new char[length];
546 * populate the sequence and secondary structure arrays
548 extractJmolChainData(monomers, seq, secstr, secstrcode);
551 * grab chain code and start position from first residue;
553 String chainId = monomers.get(0).chain.getIDStr();
554 int firstResNum = monomers.get(0).getResno();
557 // Jalview doesn't like residue < 1, so force this to 1
558 System.err.println("Converting chain " + chainId + " first RESNUM ("
559 + firstResNum + ") to 1");
564 * convert any non-gap unknown residues to 'X'
566 convertNonGapCharacters(seq);
569 * construct and add the Jalview sequence
571 String seqName = "" + modelTitle + "|" + chainId;
572 int start = firstResNum;
573 int end = firstResNum + length - 1;
575 SequenceI sq = new Sequence(seqName, seq, start, end);
577 addPdbid(sq, modelTitle, chainId);
579 addSourceDBref(sq, modelTitle, start, end);
584 * add secondary structure predictions (if any)
586 if (isPredictSecondaryStructure())
588 addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode,
589 chainId, firstResNum);
595 * Add a source db ref entry for the given sequence.
602 protected void addSourceDBref(SequenceI sq, String accessionId,
605 DBRefEntry sourceDBRef = new DBRefEntry();
606 sourceDBRef.setAccessionId(accessionId);
607 sourceDBRef.setSource(DBRefSource.MMCIF);
608 sourceDBRef.setStartRes(start);
609 sourceDBRef.setEndRes(end);
610 sq.setSourceDBRef(sourceDBRef);
611 sq.addDBRef(sourceDBRef);
615 * Add a PDBEntry giving the source of PDB data to the sequence
621 protected void addPdbid(SequenceI sq, String id, String chainId)
623 PDBEntry entry = new PDBEntry();
625 entry.setType(PDBEntry.Type.MMCIF);
626 entry.setProperty(new Hashtable());
629 // entry.getProperty().put("CHAIN", chains.elementAt(i).id);
630 entry.setChainCode(String.valueOf(chainId));
634 entry.setFile(inFile.getAbsolutePath());
638 // TODO: decide if we should dump the datasource to disk
639 entry.setFile(getDataName());
646 * Scans the list of (Jmol) Monomer objects, and adds the residue for each to
647 * the sequence array, and any converted secondary structure prediction to the
648 * secondary structure arrays
655 protected void extractJmolChainData(List<Monomer> monomers, char[] seq,
656 char[] secstr, char[] secstrcode)
659 for (Monomer monomer : monomers)
661 seq[pos] = monomer.getGroup1();
664 * JAL-1828 replace a modified amino acid with its standard equivalent
665 * (e.g. MSE with MET->M) to maximise sequence matching
667 replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos);
670 * if Jmol has derived a secondary structure prediction for this position,
671 * convert it to Jalview equivalent and save it
673 setSecondaryStructure(monomer.getProteinStructureSubType(), pos,
680 * Helper method that adds an AlignmentAnnotation for secondary structure to
681 * the sequence, provided at least one secondary structure prediction has been
692 protected void addSecondaryStructureAnnotation(String modelTitle,
693 SequenceI sq, char[] secstr, char[] secstrcode, String chainId,
696 char[] seq = sq.getSequence();
697 boolean ssFound = false;
698 Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1];
699 for (int p = 0; p < seq.length; p++)
701 if (secstr[p] >= 'A' && secstr[p] <= 'z')
703 asecstr[p] = new Annotation(String.valueOf(secstr[p]), null,
704 secstrcode[p], Float.NaN);
711 String mt = modelTitle == null ? getDataName() : modelTitle;
713 AlignmentAnnotation ann = new AlignmentAnnotation(
714 "Secondary Structure", "Secondary Structure for " + mt,
716 ann.belowAlignment = true;
718 ann.autoCalculated = false;
719 ann.setCalcId(getClass().getName());
720 ann.adjustForAlignment();
721 ann.validateRangeAndDisplay();
722 annotations.add(ann);
723 sq.addAlignmentAnnotation(ann);
728 * Replace any non-gap miscellaneous characters with 'X'
733 protected void convertNonGapCharacters(char[] seq)
735 boolean isNa = Comparison.areNucleotide(new char[][] { seq });
736 int[] cinds = isNa ? ResidueProperties.nucleotideIndex
737 : ResidueProperties.aaIndex;
738 int nonGap = isNa ? ResidueProperties.maxNucleotideIndex
739 : ResidueProperties.maxProteinIndex;
741 for (int p = 0; p < seq.length; p++)
743 if (cinds[seq[p]] == nonGap)
751 * Scans the list of Monomers (residue models), inspecting the chain id for
752 * each, and returns an array whose length is the number of chains, and values
753 * the length of each chain
758 protected List<Integer> getChainLengths(List<Monomer> monomers)
760 List<Integer> chainLengths = new ArrayList<Integer>();
761 int lastChainId = -1;
764 for (Monomer monomer : monomers)
766 int chainId = monomer.chain.chainID;
767 if (chainId != lastChainId && length > 0)
770 * change of chain - record the length of the last one
772 chainLengths.add(length);
775 lastChainId = chainId;
781 * record the length of the final chain
783 chainLengths.add(length);
790 * Returns a flattened list of Monomer (residues) in order, across all
791 * BioPolymers in the model. This simplifies assembling chains which span
792 * BioPolymers. The result omits any alternate residues reported for the same
793 * sequence position (RESNUM value).
799 protected List<Monomer> getMonomers(ModelSet ms, BioModel model)
801 List<Monomer> result = new ArrayList<Monomer>();
802 int lastResNo = Integer.MIN_VALUE;
804 for (BioPolymer bp : model.bioPolymers)
806 for (int groupLeadAtoms : bp.getLeadAtomIndices())
808 Group group = ms.at[groupLeadAtoms].group;
809 if (group instanceof Monomer)
812 * ignore alternate residue at same position example: 1ejg has
813 * residues A:LEU, B:ILE at RESNUM=25
815 int resNo = group.getResno();
816 if (lastResNo != resNo)
818 result.add((Monomer) group);
827 public boolean isPredictSecondaryStructure()
829 return predictSecondaryStructure;
832 public void setPredictSecondaryStructure(boolean predictSecondaryStructure)
834 this.predictSecondaryStructure = predictSecondaryStructure;