X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fext%2Fjmol%2FJmolParser.java;h=ddb44923a5f4a374ac6e4fc7a9e8ec58c9e3006d;hb=bdeec1956a01efb7fdc6100961c6fb596c93404e;hp=702c0b13c728bb64949c82862e04984e1168c1fb;hpb=6d7ab37f37b09174ec61fee301aed6057ef86605;p=jalview.git diff --git a/src/jalview/ext/jmol/JmolParser.java b/src/jalview/ext/jmol/JmolParser.java index 702c0b1..ddb4492 100644 --- a/src/jalview/ext/jmol/JmolParser.java +++ b/src/jalview/ext/jmol/JmolParser.java @@ -22,20 +22,18 @@ package jalview.ext.jmol; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.Annotation; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; -import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.io.FileParse; import jalview.io.StructureFile; import jalview.schemes.ResidueProperties; -import jalview.util.Comparison; +import jalview.structure.StructureImportSettings; +import jalview.util.Format; import jalview.util.MessageManager; import java.io.IOException; import java.util.ArrayList; -import java.util.Hashtable; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Vector; @@ -46,16 +44,12 @@ import org.jmol.api.JmolStatusListener; import org.jmol.api.JmolViewer; import org.jmol.c.CBK; import org.jmol.c.STR; -import org.jmol.modelset.Group; -import org.jmol.modelset.Model; import org.jmol.modelset.ModelSet; -import org.jmol.modelsetbio.BioModel; -import org.jmol.modelsetbio.BioPolymer; -import org.jmol.modelsetbio.Monomer; import org.jmol.viewer.Viewer; import MCview.Atom; import MCview.PDBChain; +import MCview.Residue; /** * Import and process files with Jmol for file like PDB, mmCIF @@ -67,38 +61,58 @@ public class JmolParser extends StructureFile implements JmolStatusListener { Viewer viewer = null; - public JmolParser(boolean addAlignmentAnnotations, - boolean predictSecondaryStructure, boolean externalSecStr, - String inFile, String type) throws IOException + public JmolParser(boolean addAlignmentAnnotations, boolean predictSecStr, + boolean externalSecStr, String inFile, String type) + throws IOException { super(inFile, type); - this.visibleChainAnnotation = addAlignmentAnnotations; - this.predictSecondaryStructure = predictSecondaryStructure; - this.externalSecondaryStructure = externalSecStr; } - public JmolParser(boolean addAlignmentAnnotations, - boolean predictSecondaryStructure, boolean externalSecStr, - FileParse fp) throws IOException + public JmolParser(boolean addAlignmentAnnotations, boolean predictSecStr, + boolean externalSecStr, FileParse fp) throws IOException { super(fp); - this.visibleChainAnnotation = addAlignmentAnnotations; - this.predictSecondaryStructure = predictSecondaryStructure; - this.externalSecondaryStructure = externalSecStr; } - public JmolParser(FileParse fp) throws IOException + public JmolParser() { - super(fp); } - public JmolParser(String inFile, String type) throws IOException + /** + * Calls the Jmol library to parse the PDB/mmCIF file, and then inspects the + * resulting object model to generate Jalview-style sequences, with secondary + * structure annotation added where available (i.e. where it has been computed + * by Jmol using DSSP). + * + * @see jalview.io.AlignFile#parse() + */ + @Override + public void parse() throws IOException { - super(inFile, type); - } + setChains(new Vector()); + Viewer jmolModel = getJmolData(); + jmolModel.openReader(getDataName(), getDataName(), getReader()); + waitForScript(jmolModel); - public JmolParser() - { + /* + * Convert one or more Jmol Model objects to Jalview sequences + */ + if (jmolModel.ms.mc > 0) + { + // ideally we do this + // try + // { + // setStructureFileType(jmolModel.evalString("show _fileType")); + // } catch (Exception q) + // { + // } + // ; + // instead, we distinguish .cif from non-.cif by filename + setStructureFileType(getDataName().toLowerCase().endsWith(".cif") ? PDBEntry.Type.MMCIF + .toString() : "PDB"); + + transformJmolModelToJalview(jmolModel.ms); + } } /** @@ -126,6 +140,247 @@ public class JmolParser extends StructureFile implements JmolStatusListener return viewer; } + public void transformJmolModelToJalview(ModelSet ms) throws IOException + { + try + { + String lastID = ""; + List rna = new ArrayList(); + List prot = new ArrayList(); + PDBChain tmpchain; + String pdbId = (String) ms.getInfo(0, "title"); + setId(pdbId); + List significantAtoms = convertSignificantAtoms(ms); + for (Atom tmpatom : significantAtoms) + { + try + { + tmpchain = findChain(tmpatom.chain); + if (tmpatom.resNumIns.trim().equals(lastID)) + { + // phosphorylated protein - seen both CA and P.. + continue; + } + tmpchain.atoms.addElement(tmpatom); + } catch (Exception e) + { + tmpchain = new PDBChain(pdbId, tmpatom.chain); + getChains().add(tmpchain); + tmpchain.atoms.addElement(tmpatom); + } + lastID = tmpatom.resNumIns.trim(); + } + xferSettings(); + + makeResidueList(); + makeCaBondList(); + + if (getId() == null) + { + setId(safeName(getDataName())); + } + for (PDBChain chain : getChains()) + { + SequenceI chainseq = postProcessChain(chain); + if (isRNA(chainseq)) + { + rna.add(chainseq); + } + else + { + prot.add(chainseq); + } + + if (StructureImportSettings.isProcessSecondaryStructure()) + { + createAnnotation(chainseq, chain, ms.at); + } + } + } catch (OutOfMemoryError er) + { + System.out + .println("OUT OF MEMORY LOADING TRANSFORMING JMOL MODEL TO JALVIEW MODEL"); + throw new IOException( + MessageManager + .getString("exception.outofmemory_loading_mmcif_file")); + } + } + + private List convertSignificantAtoms(ModelSet ms) + { + List significantAtoms = new ArrayList(); + HashMap chainTerMap = new HashMap(); + org.jmol.modelset.Atom prevAtom = null; + for (org.jmol.modelset.Atom atom : ms.at) + { + if (atom.getAtomName().equalsIgnoreCase("CA") + || atom.getAtomName().equalsIgnoreCase("P")) + { + if (!atomValidated(atom, prevAtom, chainTerMap)) + { + continue; + } + Atom curAtom = new Atom(atom.x, atom.y, atom.z); + curAtom.atomIndex = atom.getIndex(); + curAtom.chain = atom.getChainIDStr(); + curAtom.insCode = atom.group.getInsertionCode() == '\000' ? ' ' + : atom.group.getInsertionCode(); + curAtom.name = atom.getAtomName(); + curAtom.number = atom.getAtomNumber(); + curAtom.resName = atom.getGroup3(true); + curAtom.resNumber = atom.getResno(); + curAtom.occupancy = ms.occupancies != null ? ms.occupancies[atom + .getIndex()] : Float.valueOf(atom.getOccupancy100()); + String fmt = new Format("%4i").form(curAtom.resNumber); + curAtom.resNumIns = (fmt + curAtom.insCode); + curAtom.tfactor = atom.getBfactor100() / 100f; + curAtom.type = 0; + // significantAtoms.add(curAtom); + // ignore atoms from subsequent models + if (!significantAtoms.contains(curAtom)) + { + significantAtoms.add(curAtom); + } + prevAtom = atom; + } + } + return significantAtoms; + } + + private boolean atomValidated(org.jmol.modelset.Atom curAtom, + org.jmol.modelset.Atom prevAtom, + HashMap chainTerMap) + { + // System.out.println("Atom: " + curAtom.getAtomNumber() + // + " Last atom index " + curAtom.group.lastAtomIndex); + if (chainTerMap == null || prevAtom == null) + { + return true; + } + String curAtomChId = curAtom.getChainIDStr(); + String prevAtomChId = prevAtom.getChainIDStr(); + // new chain encoutered + if (!prevAtomChId.equals(curAtomChId)) + { + // On chain switch add previous chain termination to xTerMap if not exists + if (!chainTerMap.containsKey(prevAtomChId)) + { + chainTerMap.put(prevAtomChId, prevAtom); + } + // if current atom belongs to an already terminated chain and the resNum + // diff < 5 then mark as valid and update termination Atom + if (chainTerMap.containsKey(curAtomChId)) + { + if (curAtom.getResno() < chainTerMap.get(curAtomChId).getResno()) + { + return false; + } + if ((curAtom.getResno() - chainTerMap.get(curAtomChId).getResno()) < 5) + { + chainTerMap.put(curAtomChId, curAtom); + return true; + } + return false; + } + } + // atom with previously terminated chain encountered + else if (chainTerMap.containsKey(curAtomChId)) + { + if (curAtom.getResno() < chainTerMap.get(curAtomChId).getResno()) + { + return false; + } + if ((curAtom.getResno() - chainTerMap.get(curAtomChId).getResno()) < 5) + { + chainTerMap.put(curAtomChId, curAtom); + return true; + } + return false; + } + // HETATM with resNum jump > 2 + return !(curAtom.isHetero() && ((curAtom.getResno() - prevAtom + .getResno()) > 2)); + } + + private void createAnnotation(SequenceI sequence, PDBChain chain, + org.jmol.modelset.Atom[] jmolAtoms) + { + char[] secstr = new char[sequence.getLength()]; + char[] secstrcode = new char[sequence.getLength()]; + + // Ensure Residue size equals Seq size + if (chain.residues.size() != sequence.getLength()) + { + return; + } + int annotIndex = 0; + for (Residue residue : chain.residues) + { + Atom repAtom = residue.getAtoms().get(0); + STR proteinStructureSubType = jmolAtoms[repAtom.atomIndex].group + .getProteinStructureSubType(); + setSecondaryStructure(proteinStructureSubType, annotIndex, secstr, + secstrcode); + ++annotIndex; + } + addSecondaryStructureAnnotation(chain.pdbid, sequence, secstr, + secstrcode, chain.id, sequence.getStart()); + } + + /** + * Helper method that adds an AlignmentAnnotation for secondary structure to + * the sequence, provided at least one secondary structure prediction has been + * made + * + * @param modelTitle + * @param seq + * @param secstr + * @param secstrcode + * @param chainId + * @param firstResNum + * @return + */ + protected void addSecondaryStructureAnnotation(String modelTitle, + SequenceI sq, char[] secstr, char[] secstrcode, String chainId, + int firstResNum) + { + char[] seq = sq.getSequence(); + boolean ssFound = false; + Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1]; + for (int p = 0; p < seq.length; p++) + { + if (secstr[p] >= 'A' && secstr[p] <= 'z') + { + try + { + asecstr[p] = new Annotation(String.valueOf(secstr[p]), null, + secstrcode[p], Float.NaN); + ssFound = true; + } catch (Exception e) + { + // e.printStackTrace(); + } + } + } + + if (ssFound) + { + String mt = modelTitle == null ? getDataName() : modelTitle; + mt += chainId; + AlignmentAnnotation ann = new AlignmentAnnotation( + "Secondary Structure", "Secondary Structure for " + mt, + asecstr); + ann.belowAlignment = true; + ann.visible = true; + ann.autoCalculated = false; + ann.setCalcId(getClass().getName()); + ann.adjustForAlignment(); + ann.validateRangeAndDisplay(); + annotations.add(ann); + sq.addAlignmentAnnotation(ann); + } + } + private void waitForScript(Viewer jmd) { while (jmd.isScriptExecuting()) @@ -375,463 +630,24 @@ public class JmolParser extends StructureFile implements JmolStatusListener return null; } - /** - * Calls the Jmol library to parse the PDB file, and then inspects the - * resulting object model to generate Jalview-style sequences, with secondary - * structure annotation added where available (i.e. where it has been computed - * by Jmol using DSSP). - * - * @see jalview.io.AlignFile#parse() - */ - @Override - public void parse() throws IOException - { - - setChains(new Vector()); - Viewer jmolModel = getJmolData(); - jmolModel.openReader(getDataName(), getDataName(), getReader()); - waitForScript(jmolModel); - - /* - * Convert one or more Jmol Model objects to Jalview sequences - */ - if (jmolModel.ms.mc > 0) - { - // parseBiopolymer(jmolModel.ms); - transformJmolModelToJalview(jmolModel.ms); - } - } - - /** - * Process the Jmol BioPolymer array and generate a Jalview sequence for each - * chain found (including any secondary structure annotation from DSSP) - * - * @param ms - * @throws IOException - */ - public void parseBiopolymer(ModelSet ms) throws IOException - { - int modelIndex = -1; - for (Model model : ms.am) - { - modelIndex++; - String modelTitle = (String) ms.getInfo(modelIndex, "title"); - /* - * Chains can span BioPolymers, so first make a flattened list, and then - * work out the lengths of chains present - */ - List monomers = getMonomers(ms, (BioModel) model); - List chainLengths = getChainLengths(monomers); - - /* - * now chop up the Monomer list to make Jalview Sequences - */ - int from = 0; - for (int length : chainLengths) - { - buildSequenceFromChain(monomers.subList(from, from + length), - modelTitle); - from += length; - } - } - } - - public void transformJmolModelToJalview(ModelSet ms) - { - try - { - String lastID = ""; - List rna = new ArrayList(); - List prot = new ArrayList(); - PDBChain tmpchain; - String pdbId = (String) ms.getInfo(0, "title"); - setId(pdbId); - List significantAtoms = convertSignificantAtoms(ms); - for (Atom tmpatom : significantAtoms) - { - try - { - tmpchain = findChain(tmpatom.chain); - if (tmpatom.resNumIns.trim().equals(lastID)) - { - // phosphorylated protein - seen both CA and P.. - continue; - } - tmpchain.atoms.addElement(tmpatom); - } catch (Exception e) - { - tmpchain = new PDBChain(pdbId, tmpatom.chain); - getChains().add(tmpchain); - tmpchain.atoms.addElement(tmpatom); - } - lastID = tmpatom.resNumIns.trim(); - } - makeResidueList(); - makeCaBondList(); - - if (getId() == null) - { - setId(inFile.getName()); - } - for (PDBChain chain : getChains()) - { - SequenceI chainseq = postProcessChain(chain); - if (isRNA(chainseq)) - { - rna.add(chainseq); - } - else - { - prot.add(chainseq); - } - } - } catch (OutOfMemoryError er) - { - System.out - .println("OUT OF MEMORY LOADING TRANSFORMING JMOL MODEL TO JALVIEW MODEL"); - // throw new IOException( - // MessageManager - // .getString("exception.outofmemory_loading_pdb_file")); - } - } - - private List convertSignificantAtoms(ModelSet ms) - { - List significantAtoms = new ArrayList(); - for (org.jmol.modelset.Atom atom : ms.at) - { - if (atom.getAtomName().equalsIgnoreCase("CA") - || atom.getAtomName().equalsIgnoreCase("P")) - { - Atom curAtom = new Atom(atom.x, atom.y, atom.z); - curAtom.atomIndex = atom.getIndex(); - curAtom.chain = atom.getChainIDStr(); - curAtom.insCode = atom.group.getInsertionCode(); - curAtom.name = atom.getAtomName(); - curAtom.number = atom.getAtomNumber(); - curAtom.resName = atom.getGroup3(true); - curAtom.resNumber = atom.getResno(); - curAtom.occupancy = ms.occupancies != null ? ms.occupancies[atom - .getIndex()] : Float.valueOf(atom.getOccupancy100()); - curAtom.resNumIns = "" + curAtom.resNumber + curAtom.insCode; - curAtom.tfactor = 0; - curAtom.type = 0; - significantAtoms.add(curAtom); - } - } - return significantAtoms; - } - - /** - * Helper method to construct a sequence for one chain and add it to the seqs - * list - * - * @param monomers - * a list of all monomers in the chain - * @param modelTitle - */ - protected void buildSequenceFromChain(List monomers, - String modelTitle) - { - final int length = monomers.size(); - - /* - * arrays to hold sequence and secondary structure - */ - char[] seq = new char[length]; - char[] secstr = new char[length]; - char[] secstrcode = new char[length]; - - /* - * populate the sequence and secondary structure arrays - */ - extractJmolChainData(monomers, seq, secstr, secstrcode); - - /* - * grab chain code and start position from first residue; - */ - String chainId = monomers.get(0).chain.getIDStr(); - int firstResNum = monomers.get(0).getResno(); - if (firstResNum < 1) - { - // Jalview doesn't like residue < 1, so force this to 1 - System.err.println("Converting chain " + chainId + " first RESNUM (" - + firstResNum + ") to 1"); - firstResNum = 1; - } - - /* - * convert any non-gap unknown residues to 'X' - */ - convertNonGapCharacters(seq); - - /* - * construct and add the Jalview sequence - */ - String seqName = "" + modelTitle + "|" + chainId; - int start = firstResNum; - int end = firstResNum + length - 1; - - SequenceI sq = new Sequence(seqName, seq, start, end); - - addPdbid(sq, modelTitle, chainId); - - addSourceDBref(sq, modelTitle, start, end); - - seqs.add(sq); - - /* - * add secondary structure predictions (if any) - */ - if (isPredictSecondaryStructure()) - { - addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode, - chainId, firstResNum); - } - - } - - /** - * Add a source db ref entry for the given sequence. - * - * @param sq - * @param accessionId - * @param start - * @param end - */ - protected void addSourceDBref(SequenceI sq, String accessionId, - int start, int end) - { - DBRefEntry sourceDBRef = new DBRefEntry(); - sourceDBRef.setAccessionId(accessionId); - sourceDBRef.setSource(DBRefSource.MMCIF); - sourceDBRef.setStartRes(start); - sourceDBRef.setEndRes(end); - sq.setSourceDBRef(sourceDBRef); - sq.addDBRef(sourceDBRef); - } - - /** - * Add a PDBEntry giving the source of PDB data to the sequence - * - * @param sq - * @param id - * @param chainId - */ - protected void addPdbid(SequenceI sq, String id, String chainId) - { - PDBEntry entry = new PDBEntry(); - entry.setId(id); - entry.setType(PDBEntry.Type.MMCIF); - entry.setProperty(new Hashtable()); - if (chainId != null) - { - // entry.getProperty().put("CHAIN", chains.elementAt(i).id); - entry.setChainCode(String.valueOf(chainId)); - } - if (inFile != null) - { - entry.setFile(inFile.getAbsolutePath()); - } - else - { - // TODO: decide if we should dump the datasource to disk - entry.setFile(getDataName()); - } - - sq.addPDBId(entry); - } - - /** - * Scans the list of (Jmol) Monomer objects, and adds the residue for each to - * the sequence array, and any converted secondary structure prediction to the - * secondary structure arrays - * - * @param monomers - * @param seq - * @param secstr - * @param secstrcode - */ - protected void extractJmolChainData(List monomers, char[] seq, - char[] secstr, char[] secstrcode) - { - int pos = 0; - for (Monomer monomer : monomers) - { - seq[pos] = monomer.getGroup1(); - - /* - * JAL-1828 replace a modified amino acid with its standard equivalent - * (e.g. MSE with MET->M) to maximise sequence matching - */ - replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos); - - /* - * if Jmol has derived a secondary structure prediction for this position, - * convert it to Jalview equivalent and save it - */ - setSecondaryStructure(monomer.getProteinStructureSubType(), pos, - secstr, secstrcode); - pos++; - } - } - - /** - * Helper method that adds an AlignmentAnnotation for secondary structure to - * the sequence, provided at least one secondary structure prediction has been - * made - * - * @param modelTitle - * @param seq - * @param secstr - * @param secstrcode - * @param chainId - * @param firstResNum - * @return - */ - protected void addSecondaryStructureAnnotation(String modelTitle, - SequenceI sq, char[] secstr, char[] secstrcode, String chainId, - int firstResNum) - { - char[] seq = sq.getSequence(); - boolean ssFound = false; - Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1]; - for (int p = 0; p < seq.length; p++) - { - if (secstr[p] >= 'A' && secstr[p] <= 'z') - { - asecstr[p] = new Annotation(String.valueOf(secstr[p]), null, - secstrcode[p], Float.NaN); - ssFound = true; - } - } - - if (ssFound) - { - String mt = modelTitle == null ? getDataName() : modelTitle; - mt += chainId; - AlignmentAnnotation ann = new AlignmentAnnotation( - "Secondary Structure", "Secondary Structure for " + mt, - asecstr); - ann.belowAlignment = true; - ann.visible = true; - ann.autoCalculated = false; - ann.setCalcId(getClass().getName()); - ann.adjustForAlignment(); - ann.validateRangeAndDisplay(); - annotations.add(ann); - sq.addAlignmentAnnotation(ann); - } - } - - /** - * Replace any non-gap miscellaneous characters with 'X' - * - * @param seq - * @return - */ - protected void convertNonGapCharacters(char[] seq) - { - boolean isNa = Comparison.areNucleotide(new char[][] { seq }); - int[] cinds = isNa ? ResidueProperties.nucleotideIndex - : ResidueProperties.aaIndex; - int nonGap = isNa ? ResidueProperties.maxNucleotideIndex - : ResidueProperties.maxProteinIndex; - - for (int p = 0; p < seq.length; p++) - { - if (cinds[seq[p]] == nonGap) - { - seq[p] = 'X'; - } - } - } - - /** - * Scans the list of Monomers (residue models), inspecting the chain id for - * each, and returns an array whose length is the number of chains, and values - * the length of each chain - * - * @param monomers - * @return - */ - protected List getChainLengths(List monomers) + public boolean isPredictSecondaryStructure() { - List chainLengths = new ArrayList(); - int lastChainId = -1; - int length = 0; - - for (Monomer monomer : monomers) - { - int chainId = monomer.chain.chainID; - if (chainId != lastChainId && length > 0) - { - /* - * change of chain - record the length of the last one - */ - chainLengths.add(length); - length = 0; - } - lastChainId = chainId; - length++; - } - if (length > 0) - { - /* - * record the length of the final chain - */ - chainLengths.add(length); - } - - return chainLengths; + return predictSecondaryStructure; } - /** - * Returns a flattened list of Monomer (residues) in order, across all - * BioPolymers in the model. This simplifies assembling chains which span - * BioPolymers. The result omits any alternate residues reported for the same - * sequence position (RESNUM value). - * - * @param ms - * @param model - * @return - */ - protected List getMonomers(ModelSet ms, BioModel model) + public void setPredictSecondaryStructure(boolean predictSecondaryStructure) { - List result = new ArrayList(); - int lastResNo = Integer.MIN_VALUE; - - for (BioPolymer bp : model.bioPolymers) - { - for (int groupLeadAtoms : bp.getLeadAtomIndices()) - { - Group group = ms.at[groupLeadAtoms].group; - if (group instanceof Monomer) - { - /* - * ignore alternate residue at same position example: 1ejg has - * residues A:LEU, B:ILE at RESNUM=25 - */ - int resNo = group.getResno(); - if (lastResNo != resNo) - { - result.add((Monomer) group); - } - lastResNo = resNo; - } - } - } - return result; + this.predictSecondaryStructure = predictSecondaryStructure; } - public boolean isPredictSecondaryStructure() + public boolean isVisibleChainAnnotation() { - return predictSecondaryStructure; + return visibleChainAnnotation; } - public void setPredictSecondaryStructure(boolean predictSecondaryStructure) + public void setVisibleChainAnnotation(boolean visibleChainAnnotation) { - this.predictSecondaryStructure = predictSecondaryStructure; + this.visibleChainAnnotation = visibleChainAnnotation; } }