X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fjmol%2FJmolParser.java;h=180da8f449f8044e4f522fb5f758a5dc1b4fd277;hb=2681593fa6810815547b688b89424c837cac1240;hp=668457b4b4cbe7ac2bc2be0302d42aa13be5c827;hpb=e77a693cf000b4ff8a863411acf8c90c6390a061;p=jalview.git diff --git a/src/jalview/ext/jmol/JmolParser.java b/src/jalview/ext/jmol/JmolParser.java index 668457b..180da8f 100644 --- a/src/jalview/ext/jmol/JmolParser.java +++ b/src/jalview/ext/jmol/JmolParser.java @@ -22,36 +22,27 @@ package jalview.ext.jmol; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.Annotation; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; -import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.io.FileParse; import jalview.io.StructureFile; import jalview.schemes.ResidueProperties; -import jalview.util.Comparison; +import jalview.structure.StructureImportSettings; +import jalview.util.Format; import jalview.util.MessageManager; import java.io.IOException; import java.util.ArrayList; -import java.util.Hashtable; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Vector; -import javajs.awt.Dimension; - import org.jmol.api.JmolStatusListener; import org.jmol.api.JmolViewer; import org.jmol.c.CBK; import org.jmol.c.STR; -import org.jmol.modelset.Group; -import org.jmol.modelset.Model; import org.jmol.modelset.ModelSet; -import org.jmol.modelsetbio.BioModel; -import org.jmol.modelsetbio.BioPolymer; -import org.jmol.modelsetbio.Monomer; import org.jmol.viewer.Viewer; import MCview.Atom; @@ -68,24 +59,9 @@ public class JmolParser extends StructureFile implements JmolStatusListener { Viewer viewer = null; - public JmolParser(boolean addAlignmentAnnotations, - boolean predictSecondaryStructure, boolean externalSecStr, - String inFile, String type) throws IOException + public JmolParser(String inFile, String type) throws IOException { super(inFile, type); - this.visibleChainAnnotation = addAlignmentAnnotations; - this.predictSecondaryStructure = predictSecondaryStructure; - this.externalSecondaryStructure = externalSecStr; - } - - public JmolParser(boolean addAlignmentAnnotations, - boolean predictSecondaryStructure, boolean externalSecStr, - FileParse fp) throws IOException - { - super(fp); - this.visibleChainAnnotation = addAlignmentAnnotations; - this.predictSecondaryStructure = predictSecondaryStructure; - this.externalSecondaryStructure = externalSecStr; } public JmolParser(FileParse fp) throws IOException @@ -93,11 +69,6 @@ public class JmolParser extends StructureFile implements JmolStatusListener super(fp); } - public JmolParser(String inFile, String type) throws IOException - { - super(inFile, type); - } - public JmolParser() { } @@ -113,7 +84,6 @@ public class JmolParser extends StructureFile implements JmolStatusListener @Override public void parse() throws IOException { - setChains(new Vector()); Viewer jmolModel = getJmolData(); jmolModel.openReader(getDataName(), getDataName(), getReader()); @@ -124,8 +94,19 @@ public class JmolParser extends StructureFile implements JmolStatusListener */ if (jmolModel.ms.mc > 0) { - parseBiopolymer(jmolModel.ms); - // transformJmolModelToJalview(jmolModel.ms); + // ideally we do this + // try + // { + // setStructureFileType(jmolModel.evalString("show _fileType")); + // } catch (Exception q) + // { + // } + // ; + // instead, we distinguish .cif from non-.cif by filename + setStructureFileType(getDataName().toLowerCase().endsWith(".cif") ? PDBEntry.Type.MMCIF + .toString() : "PDB"); + + transformJmolModelToJalview(jmolModel.ms); } } @@ -140,6 +121,10 @@ public class JmolParser extends StructureFile implements JmolStatusListener { try { + /* + * params -o (output to sysout) -n (nodisplay) -x (exit when finished) + * see http://wiki.jmol.org/index.php/Jmol_Application + */ viewer = (Viewer) JmolViewer.allocateViewer(null, null, null, null, null, "-x -o -n", this); // ensure the 'new' (DSSP) not 'old' (Ramachandran) SS method is used @@ -163,7 +148,17 @@ public class JmolParser extends StructureFile implements JmolStatusListener List prot = new ArrayList(); PDBChain tmpchain; String pdbId = (String) ms.getInfo(0, "title"); - setId(pdbId); + + if (pdbId == null) + { + setId(safeName(getDataName())); + setPDBIdAvailable(false); + } + else + { + setId(pdbId); + setPDBIdAvailable(true); + } List significantAtoms = convertSignificantAtoms(ms); for (Atom tmpatom : significantAtoms) { @@ -178,23 +173,20 @@ public class JmolParser extends StructureFile implements JmolStatusListener tmpchain.atoms.addElement(tmpatom); } catch (Exception e) { - tmpchain = new PDBChain(pdbId, tmpatom.chain); + tmpchain = new PDBChain(getId(), tmpatom.chain); getChains().add(tmpchain); tmpchain.atoms.addElement(tmpatom); } lastID = tmpatom.resNumIns.trim(); } + xferSettings(); + makeResidueList(); makeCaBondList(); - if (getId() == null) - { - setId(inFile.getName()); - } for (PDBChain chain : getChains()) { SequenceI chainseq = postProcessChain(chain); - createAnnotation(chainseq, chain, ms.at); if (isRNA(chainseq)) { rna.add(chainseq); @@ -203,6 +195,11 @@ public class JmolParser extends StructureFile implements JmolStatusListener { prot.add(chainseq); } + + if (StructureImportSettings.isProcessSecondaryStructure()) + { + createAnnotation(chainseq, chain, ms.at); + } } } catch (OutOfMemoryError er) { @@ -217,331 +214,124 @@ public class JmolParser extends StructureFile implements JmolStatusListener private List convertSignificantAtoms(ModelSet ms) { List significantAtoms = new ArrayList(); + HashMap chainTerMap = new HashMap(); + org.jmol.modelset.Atom prevAtom = null; for (org.jmol.modelset.Atom atom : ms.at) { if (atom.getAtomName().equalsIgnoreCase("CA") || atom.getAtomName().equalsIgnoreCase("P")) { + if (!atomValidated(atom, prevAtom, chainTerMap)) + { + continue; + } Atom curAtom = new Atom(atom.x, atom.y, atom.z); curAtom.atomIndex = atom.getIndex(); curAtom.chain = atom.getChainIDStr(); - curAtom.insCode = atom.group.getInsertionCode(); + curAtom.insCode = atom.group.getInsertionCode() == '\000' ? ' ' + : atom.group.getInsertionCode(); curAtom.name = atom.getAtomName(); curAtom.number = atom.getAtomNumber(); curAtom.resName = atom.getGroup3(true); curAtom.resNumber = atom.getResno(); - curAtom.ss = getSecondayStructure(atom.group - .getProteinStructureSubType()); curAtom.occupancy = ms.occupancies != null ? ms.occupancies[atom .getIndex()] : Float.valueOf(atom.getOccupancy100()); - curAtom.resNumIns = "" + curAtom.resNumber + curAtom.insCode; - // curAtom.tfactor = atom.group.; + String fmt = new Format("%4i").form(curAtom.resNumber); + curAtom.resNumIns = (fmt + curAtom.insCode); + curAtom.tfactor = atom.getBfactor100() / 100f; curAtom.type = 0; - significantAtoms.add(curAtom); + // significantAtoms.add(curAtom); + // ignore atoms from subsequent models + if (!significantAtoms.contains(curAtom)) + { + significantAtoms.add(curAtom); + } + prevAtom = atom; } } return significantAtoms; } - private void createAnnotation(SequenceI sequence, PDBChain chain, - org.jmol.modelset.Atom[] jmolAtoms) + private boolean atomValidated(org.jmol.modelset.Atom curAtom, + org.jmol.modelset.Atom prevAtom, + HashMap chainTerMap) { - char[] secstr = new char[sequence.getLength()]; - char[] secstrcode = new char[sequence.getLength()]; - for (Residue residue : chain.residues) + // System.out.println("Atom: " + curAtom.getAtomNumber() + // + " Last atom index " + curAtom.group.lastAtomIndex); + if (chainTerMap == null || prevAtom == null) { - + return true; } - addSecondaryStructureAnnotation(chain.pdbid, sequence, secstr, - secstrcode, chain.id, sequence.getStart()); - } - - /** - * Process the Jmol BioPolymer array and generate a Jalview sequence for each - * chain found (including any secondary structure annotation from DSSP) - * - * @param ms - * @throws IOException - */ - public void parseBiopolymer(ModelSet ms) throws IOException - { - int modelIndex = -1; - for (Model model : ms.am) + String curAtomChId = curAtom.getChainIDStr(); + String prevAtomChId = prevAtom.getChainIDStr(); + // new chain encoutered + if (!prevAtomChId.equals(curAtomChId)) { - modelIndex++; - String modelTitle = (String) ms.getInfo(modelIndex, "title"); - /* - * Chains can span BioPolymers, so first make a flattened list, and then - * work out the lengths of chains present - */ - List monomers = getMonomers(ms, (BioModel) model); - List chainLengths = getChainLengths(monomers); - - /* - * now chop up the Monomer list to make Jalview Sequences - */ - int from = 0; - for (int length : chainLengths) + // On chain switch add previous chain termination to xTerMap if not exists + if (!chainTerMap.containsKey(prevAtomChId)) { - buildSequenceFromChain(monomers.subList(from, from + length), - modelTitle); - from += length; + chainTerMap.put(prevAtomChId, prevAtom); } - } - } - - /** - * Returns a flattened list of Monomer (residues) in order, across all - * BioPolymers in the model. This simplifies assembling chains which span - * BioPolymers. The result omits any alternate residues reported for the same - * sequence position (RESNUM value). - * - * @param ms - * @param model - * @return - */ - protected List getMonomers(ModelSet ms, BioModel model) - { - List result = new ArrayList(); - int lastResNo = Integer.MIN_VALUE; - - for (BioPolymer bp : model.bioPolymers) - { - for (int groupLeadAtoms : bp.getLeadAtomIndices()) + // if current atom belongs to an already terminated chain and the resNum + // diff < 5 then mark as valid and update termination Atom + if (chainTerMap.containsKey(curAtomChId)) { - Group group = ms.at[groupLeadAtoms].group; - if (group instanceof Monomer) + if (curAtom.getResno() < chainTerMap.get(curAtomChId).getResno()) { - /* - * ignore alternate residue at same position example: 1ejg has - * residues A:LEU, B:ILE at RESNUM=25 - */ - int resNo = group.getResno(); - if (lastResNo != resNo) - { - result.add((Monomer) group); - } - lastResNo = resNo; + return false; + } + if ((curAtom.getResno() - chainTerMap.get(curAtomChId).getResno()) < 5) + { + chainTerMap.put(curAtomChId, curAtom); + return true; } + return false; } } - return result; - } - - /** - * Scans the list of Monomers (residue models), inspecting the chain id for - * each, and returns an array whose length is the number of chains, and values - * the length of each chain - * - * @param monomers - * @return - */ - protected List getChainLengths(List monomers) - { - List chainLengths = new ArrayList(); - int lastChainId = -1; - int length = 0; - - for (Monomer monomer : monomers) + // atom with previously terminated chain encountered + else if (chainTerMap.containsKey(curAtomChId)) { - int chainId = monomer.chain.chainID; - if (chainId != lastChainId && length > 0) + if (curAtom.getResno() < chainTerMap.get(curAtomChId).getResno()) { - /* - * change of chain - record the length of the last one - */ - chainLengths.add(length); - length = 0; + return false; } - lastChainId = chainId; - length++; - } - if (length > 0) - { - /* - * record the length of the final chain - */ - chainLengths.add(length); - } - - return chainLengths; - } - - /** - * Helper method to construct a sequence for one chain and add it to the seqs - * list - * - * @param monomers - * a list of all monomers in the chain - * @param modelTitle - */ - protected void buildSequenceFromChain(List monomers, - String modelTitle) - { - final int length = monomers.size(); - - /* - * arrays to hold sequence and secondary structure - */ - char[] seq = new char[length]; - char[] secstr = new char[length]; - char[] secstrcode = new char[length]; - - /* - * populate the sequence and secondary structure arrays - */ - extractJmolChainData(monomers, seq, secstr, secstrcode); - - /* - * grab chain code and start position from first residue; - */ - String chainId = monomers.get(0).chain.getIDStr(); - int firstResNum = monomers.get(0).getResno(); - if (firstResNum < 1) - { - // Jalview doesn't like residue < 1, so force this to 1 - System.err.println("Converting chain " + chainId + " first RESNUM (" - + firstResNum + ") to 1"); - firstResNum = 1; - } - - /* - * convert any non-gap unknown residues to 'X' - */ - convertNonGapCharacters(seq); - - /* - * construct and add the Jalview sequence - */ - String seqName = "" + modelTitle + "|" + chainId; - int start = firstResNum; - int end = firstResNum + length - 1; - - SequenceI sq = new Sequence(seqName, seq, start, end); - - addPdbid(sq, modelTitle, chainId); - - addSourceDBref(sq, modelTitle, start, end); - - seqs.add(sq); - - /* - * add secondary structure predictions (if any) - */ - addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode, - chainId, firstResNum); - - } - - /** - * Scans the list of (Jmol) Monomer objects, and adds the residue for each to - * the sequence array, and any converted secondary structure prediction to the - * secondary structure arrays - * - * @param monomers - * @param seq - * @param secstr - * @param secstrcode - */ - protected void extractJmolChainData(List monomers, char[] seq, - char[] secstr, char[] secstrcode) - { - int pos = 0; - for (Monomer monomer : monomers) - { - seq[pos] = monomer.getGroup1(); - - /* - * JAL-1828 replace a modified amino acid with its standard equivalent - * (e.g. MSE with MET->M) to maximise sequence matching - */ - replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos); - - /* - * if Jmol has derived a secondary structure prediction for this position, - * convert it to Jalview equivalent and save it - */ - setSecondaryStructure(monomer.getProteinStructureSubType(), pos, - secstr, secstrcode); - pos++; - } - } - - /** - * Replace any non-gap miscellaneous characters with 'X' - * - * @param seq - * @return - */ - protected void convertNonGapCharacters(char[] seq) - { - boolean isNa = Comparison.areNucleotide(new char[][] { seq }); - int[] cinds = isNa ? ResidueProperties.nucleotideIndex - : ResidueProperties.aaIndex; - int nonGap = isNa ? ResidueProperties.maxNucleotideIndex - : ResidueProperties.maxProteinIndex; - - for (int p = 0; p < seq.length; p++) - { - if (cinds[seq[p]] == nonGap) + if ((curAtom.getResno() - chainTerMap.get(curAtomChId).getResno()) < 5) { - seq[p] = 'X'; + chainTerMap.put(curAtomChId, curAtom); + return true; } + return false; } + // HETATM with resNum jump > 2 + return !(curAtom.isHetero() && ((curAtom.getResno() - prevAtom + .getResno()) > 2)); } - /** - * Add a source db ref entry for the given sequence. - * - * @param sq - * @param accessionId - * @param start - * @param end - */ - protected void addSourceDBref(SequenceI sq, String accessionId, - int start, int end) + private void createAnnotation(SequenceI sequence, PDBChain chain, + org.jmol.modelset.Atom[] jmolAtoms) { - DBRefEntry sourceDBRef = new DBRefEntry(); - sourceDBRef.setAccessionId(accessionId); - sourceDBRef.setSource(DBRefSource.MMCIF); - sourceDBRef.setStartRes(start); - sourceDBRef.setEndRes(end); - sq.setSourceDBRef(sourceDBRef); - sq.addDBRef(sourceDBRef); - } + char[] secstr = new char[sequence.getLength()]; + char[] secstrcode = new char[sequence.getLength()]; - /** - * Add a PDBEntry giving the source of PDB data to the sequence - * - * @param sq - * @param id - * @param chainId - */ - protected void addPdbid(SequenceI sq, String id, String chainId) - { - PDBEntry entry = new PDBEntry(); - entry.setId(id); - entry.setType(PDBEntry.Type.MMCIF); - entry.setProperty(new Hashtable()); - if (chainId != null) - { - // entry.getProperty().put("CHAIN", chains.elementAt(i).id); - entry.setChainCode(String.valueOf(chainId)); - } - if (inFile != null) + // Ensure Residue size equals Seq size + if (chain.residues.size() != sequence.getLength()) { - entry.setFile(inFile.getAbsolutePath()); + return; } - else + int annotIndex = 0; + for (Residue residue : chain.residues) { - // TODO: decide if we should dump the datasource to disk - entry.setFile(getDataName()); + Atom repAtom = residue.getAtoms().get(0); + STR proteinStructureSubType = jmolAtoms[repAtom.atomIndex].group + .getProteinStructureSubType(); + setSecondaryStructure(proteinStructureSubType, annotIndex, secstr, + secstrcode); + ++annotIndex; } - - sq.addPDBId(entry); + addSecondaryStructureAnnotation(chain.pdbid, sequence, secstr, + secstrcode, chain.id, sequence.getStart()); } - /** * Helper method that adds an AlignmentAnnotation for secondary structure to * the sequence, provided at least one secondary structure prediction has been @@ -566,9 +356,15 @@ public class JmolParser extends StructureFile implements JmolStatusListener { if (secstr[p] >= 'A' && secstr[p] <= 'z') { - asecstr[p] = new Annotation(String.valueOf(secstr[p]), null, - secstrcode[p], Float.NaN); - ssFound = true; + try + { + asecstr[p] = new Annotation(String.valueOf(secstr[p]), null, + secstrcode[p], Float.NaN); + ssFound = true; + } catch (Exception e) + { + // e.printStackTrace(); + } } } @@ -651,24 +447,6 @@ public class JmolParser extends StructureFile implements JmolStatusListener } } - private char getSecondayStructure(STR proteinStructureSubType) - { - switch (proteinStructureSubType) - { - case HELIX310: - return '3'; - case HELIX: - case HELIXALPHA: - return 'H'; - case HELIXPI: - return 'P'; - case SHEET: - return 'E'; - default: - return 0; - } - } - /** * Convert any non-standard peptide codes to their standard code table * equivalent. (Initial version only does Selenomethionine MSE->MET.) @@ -846,7 +624,7 @@ public class JmolParser extends StructureFile implements JmolStatusListener * Not implemented - returns null */ @Override - public Dimension resizeInnerPanel(String data) + public int[] resizeInnerPanel(String data) { return null; } @@ -867,4 +645,14 @@ public class JmolParser extends StructureFile implements JmolStatusListener this.predictSecondaryStructure = predictSecondaryStructure; } + public boolean isVisibleChainAnnotation() + { + return visibleChainAnnotation; + } + + public void setVisibleChainAnnotation(boolean visibleChainAnnotation) + { + this.visibleChainAnnotation = visibleChainAnnotation; + } + }