From: tcofoegbu Date: Thu, 3 Mar 2016 09:23:40 +0000 (+0000) Subject: JAL-1919 PDBfile and JmolParser refactor X-Git-Tag: Release_2_10_0~293^2~9 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=e77a693cf000b4ff8a863411acf8c90c6390a061;p=jalview.git JAL-1919 PDBfile and JmolParser refactor --- diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index e428989..15724d3 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -1284,4 +1284,5 @@ label.sifts_mapping = SIFTs Mapping label.mapping_method = Sequence \u27f7 Structure mapping method status.waiting_for_user_to_select_output_file = Waiting for user to select {0} file. status.cancelled_image_export_operation = Cancelled {0} export operation. -info.error_creating_file = Error creating {0} file. \ No newline at end of file +info.error_creating_file = Error creating {0} file. +exception.outofmemory_loading_mmcif_file = Out of memory loading mmCIF File \ No newline at end of file diff --git a/src/MCview/Atom.java b/src/MCview/Atom.java index 68a7c21..1e7f973 100755 --- a/src/MCview/Atom.java +++ b/src/MCview/Atom.java @@ -46,6 +46,8 @@ public class Atom public int type; + public char ss; + Color color = Color.lightGray; public String chain; diff --git a/src/MCview/PDBfile.java b/src/MCview/PDBfile.java index 0934fdb..ed694f2 100755 --- a/src/MCview/PDBfile.java +++ b/src/MCview/PDBfile.java @@ -20,17 +20,13 @@ */ package MCview; -import jalview.analysis.AlignSeq; -import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; -import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceI; import jalview.io.FileParse; import jalview.io.StructureFile; import jalview.util.MessageManager; import java.io.IOException; -import java.lang.reflect.Constructor; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; @@ -191,7 +187,7 @@ public class PDBfile extends StructureFile } if (predictSecondaryStructure) { - predictSecondaryStructure(rna, prot); + addSecondaryStructure(rna, prot); } } catch (OutOfMemoryError er) { @@ -210,49 +206,7 @@ public class PDBfile extends StructureFile markCalcIds(); } - /** - * Predict secondary structure for RNA and/or protein sequences and add as - * annotations - * - * @param rnaSequences - * @param proteinSequences - */ - protected void predictSecondaryStructure(List rnaSequences, - List proteinSequences) - { - /* - * Currently using Annotate3D for RNA, but only if the 'use external - * prediction' flag is set - */ - if (externalSecondaryStructure && rnaSequences.size() > 0) - { - try - { - processPdbFileWithAnnotate3d(rnaSequences); - } catch (Exception x) - { - System.err.println("Exceptions when dealing with RNA in pdb file"); - x.printStackTrace(); - - } - } - /* - * Currently using JMol PDB parser for peptide - */ - if (proteinSequences.size() > 0) - { - try - { - processPdbFileWithJmol(proteinSequences); - } catch (Exception x) - { - System.err - .println("Exceptions from Jmol when processing data in pdb file"); - x.printStackTrace(); - } - } - } /** * Process a parsed chain to construct and return a Sequence, and add it to @@ -306,39 +260,4 @@ public class PDBfile extends StructureFile } } - private void processPdbFileWithJmol(List prot) - throws Exception - { - try - { - - Class cl = Class.forName("jalview.ext.jmol.JmolParser"); - if (cl != null) - { - final Constructor constructor = cl - .getConstructor(new Class[] { FileParse.class }); - final Object[] args = new Object[] { new FileParse(getDataName(), - type) }; - Object jmf = constructor.newInstance(args); - AlignmentI al = new Alignment((SequenceI[]) cl.getMethod( - "getSeqsAsArray", new Class[] {}).invoke(jmf)); - cl.getMethod("addAnnotations", new Class[] { AlignmentI.class }) - .invoke(jmf, al); - for (SequenceI sq : al.getSequences()) - { - if (sq.getDatasetSequence() != null) - { - sq.getDatasetSequence().getAllPDBEntries().clear(); - } - else - { - sq.getAllPDBEntries().clear(); - } - } - replaceAndUpdateChains(prot, al, AlignSeq.PEP, false); - } - } catch (ClassNotFoundException q) - { - } - } } diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index ac2f9c1..d73b283 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -420,7 +420,7 @@ public class Sequence extends ASequence implements SequenceI @Override public Vector getAllPDBEntries() { - return pdbIds; + return pdbIds == null ? new Vector() : pdbIds; } /** diff --git a/src/jalview/ext/jmol/JmolParser.java b/src/jalview/ext/jmol/JmolParser.java index 702c0b1..668457b 100644 --- a/src/jalview/ext/jmol/JmolParser.java +++ b/src/jalview/ext/jmol/JmolParser.java @@ -56,6 +56,7 @@ import org.jmol.viewer.Viewer; import MCview.Atom; import MCview.PDBChain; +import MCview.Residue; /** * Import and process files with Jmol for file like PDB, mmCIF @@ -102,6 +103,33 @@ public class JmolParser extends StructureFile implements JmolStatusListener } /** + * Calls the Jmol library to parse the PDB/mmCIF file, and then inspects the + * resulting object model to generate Jalview-style sequences, with secondary + * structure annotation added where available (i.e. where it has been computed + * by Jmol using DSSP). + * + * @see jalview.io.AlignFile#parse() + */ + @Override + public void parse() throws IOException + { + + setChains(new Vector()); + Viewer jmolModel = getJmolData(); + jmolModel.openReader(getDataName(), getDataName(), getReader()); + waitForScript(jmolModel); + + /* + * Convert one or more Jmol Model objects to Jalview sequences + */ + if (jmolModel.ms.mc > 0) + { + parseBiopolymer(jmolModel.ms); + // transformJmolModelToJalview(jmolModel.ms); + } + } + + /** * create a headless jmol instance for dataprocessing * * @return @@ -126,702 +154,707 @@ public class JmolParser extends StructureFile implements JmolStatusListener return viewer; } - private void waitForScript(Viewer jmd) + public void transformJmolModelToJalview(ModelSet ms) throws IOException { - while (jmd.isScriptExecuting()) + try { - try + String lastID = ""; + List rna = new ArrayList(); + List prot = new ArrayList(); + PDBChain tmpchain; + String pdbId = (String) ms.getInfo(0, "title"); + setId(pdbId); + List significantAtoms = convertSignificantAtoms(ms); + for (Atom tmpatom : significantAtoms) { - Thread.sleep(50); + try + { + tmpchain = findChain(tmpatom.chain); + if (tmpatom.resNumIns.trim().equals(lastID)) + { + // phosphorylated protein - seen both CA and P.. + continue; + } + tmpchain.atoms.addElement(tmpatom); + } catch (Exception e) + { + tmpchain = new PDBChain(pdbId, tmpatom.chain); + getChains().add(tmpchain); + tmpchain.atoms.addElement(tmpatom); + } + lastID = tmpatom.resNumIns.trim(); + } + makeResidueList(); + makeCaBondList(); - } catch (InterruptedException x) + if (getId() == null) + { + setId(inFile.getName()); + } + for (PDBChain chain : getChains()) { + SequenceI chainseq = postProcessChain(chain); + createAnnotation(chainseq, chain, ms.at); + if (isRNA(chainseq)) + { + rna.add(chainseq); + } + else + { + prot.add(chainseq); + } } + } catch (OutOfMemoryError er) + { + System.out + .println("OUT OF MEMORY LOADING TRANSFORMING JMOL MODEL TO JALVIEW MODEL"); + throw new IOException( + MessageManager + .getString("exception.outofmemory_loading_mmcif_file")); } } - /** - * Convert Jmol's secondary structure code to Jalview's, and stored it in the - * secondary structure arrays at the given sequence position - * - * @param proteinStructureSubType - * @param pos - * @param secstr - * @param secstrcode - */ - protected void setSecondaryStructure(STR proteinStructureSubType, - int pos, char[] secstr, char[] secstrcode) + private List convertSignificantAtoms(ModelSet ms) { - switch (proteinStructureSubType) + List significantAtoms = new ArrayList(); + for (org.jmol.modelset.Atom atom : ms.at) { - case HELIX310: - secstr[pos] = '3'; - break; - case HELIX: - case HELIXALPHA: - secstr[pos] = 'H'; - break; - case HELIXPI: - secstr[pos] = 'P'; - break; - case SHEET: - secstr[pos] = 'E'; - break; - default: - secstr[pos] = 0; + if (atom.getAtomName().equalsIgnoreCase("CA") + || atom.getAtomName().equalsIgnoreCase("P")) + { + Atom curAtom = new Atom(atom.x, atom.y, atom.z); + curAtom.atomIndex = atom.getIndex(); + curAtom.chain = atom.getChainIDStr(); + curAtom.insCode = atom.group.getInsertionCode(); + curAtom.name = atom.getAtomName(); + curAtom.number = atom.getAtomNumber(); + curAtom.resName = atom.getGroup3(true); + curAtom.resNumber = atom.getResno(); + curAtom.ss = getSecondayStructure(atom.group + .getProteinStructureSubType()); + curAtom.occupancy = ms.occupancies != null ? ms.occupancies[atom + .getIndex()] : Float.valueOf(atom.getOccupancy100()); + curAtom.resNumIns = "" + curAtom.resNumber + curAtom.insCode; + // curAtom.tfactor = atom.group.; + curAtom.type = 0; + significantAtoms.add(curAtom); + } } + return significantAtoms; + } - switch (proteinStructureSubType) + private void createAnnotation(SequenceI sequence, PDBChain chain, + org.jmol.modelset.Atom[] jmolAtoms) + { + char[] secstr = new char[sequence.getLength()]; + char[] secstrcode = new char[sequence.getLength()]; + for (Residue residue : chain.residues) { - case HELIX310: - case HELIXALPHA: - case HELIXPI: - case HELIX: - secstrcode[pos] = 'H'; - break; - case SHEET: - secstrcode[pos] = 'E'; - break; - default: - secstrcode[pos] = 0; + } + addSecondaryStructureAnnotation(chain.pdbid, sequence, secstr, + secstrcode, chain.id, sequence.getStart()); } /** - * Convert any non-standard peptide codes to their standard code table - * equivalent. (Initial version only does Selenomethionine MSE->MET.) + * Process the Jmol BioPolymer array and generate a Jalview sequence for each + * chain found (including any secondary structure annotation from DSSP) * - * @param threeLetterCode - * @param seq - * @param pos + * @param ms + * @throws IOException */ - protected void replaceNonCanonicalResidue(String threeLetterCode, - char[] seq, int pos) + public void parseBiopolymer(ModelSet ms) throws IOException { - String canonical = ResidueProperties - .getCanonicalAminoAcid(threeLetterCode); - if (canonical != null && !canonical.equalsIgnoreCase(threeLetterCode)) + int modelIndex = -1; + for (Model model : ms.am) { - seq[pos] = ResidueProperties.getSingleCharacterCode(canonical); + modelIndex++; + String modelTitle = (String) ms.getInfo(modelIndex, "title"); + /* + * Chains can span BioPolymers, so first make a flattened list, and then + * work out the lengths of chains present + */ + List monomers = getMonomers(ms, (BioModel) model); + List chainLengths = getChainLengths(monomers); + + /* + * now chop up the Monomer list to make Jalview Sequences + */ + int from = 0; + for (int length : chainLengths) + { + buildSequenceFromChain(monomers.subList(from, from + length), + modelTitle); + from += length; + } } } /** - * Not implemented - returns null + * Returns a flattened list of Monomer (residues) in order, across all + * BioPolymers in the model. This simplifies assembling chains which span + * BioPolymers. The result omits any alternate residues reported for the same + * sequence position (RESNUM value). + * + * @param ms + * @param model + * @return */ - @Override - public String print() + protected List getMonomers(ModelSet ms, BioModel model) { - return null; + List result = new ArrayList(); + int lastResNo = Integer.MIN_VALUE; + + for (BioPolymer bp : model.bioPolymers) + { + for (int groupLeadAtoms : bp.getLeadAtomIndices()) + { + Group group = ms.at[groupLeadAtoms].group; + if (group instanceof Monomer) + { + /* + * ignore alternate residue at same position example: 1ejg has + * residues A:LEU, B:ILE at RESNUM=25 + */ + int resNo = group.getResno(); + if (lastResNo != resNo) + { + result.add((Monomer) group); + } + lastResNo = resNo; + } + } + } + return result; } /** - * Not implemented + * Scans the list of Monomers (residue models), inspecting the chain id for + * each, and returns an array whose length is the number of chains, and values + * the length of each chain + * + * @param monomers + * @return */ - @Override - public void setCallbackFunction(String callbackType, - String callbackFunction) + protected List getChainLengths(List monomers) { - } + List chainLengths = new ArrayList(); + int lastChainId = -1; + int length = 0; - @Override - public void notifyCallback(CBK cbType, Object[] data) - { - String strInfo = (data == null || data[1] == null ? null : data[1] - .toString()); - switch (cbType) + for (Monomer monomer : monomers) { - case ECHO: - sendConsoleEcho(strInfo); - break; - case SCRIPT: - notifyScriptTermination((String) data[2], - ((Integer) data[3]).intValue()); - break; - case MEASURE: - String mystatus = (String) data[3]; - if (mystatus.indexOf("Picked") >= 0 - || mystatus.indexOf("Sequence") >= 0) + int chainId = monomer.chain.chainID; + if (chainId != lastChainId && length > 0) { - // Picking mode - sendConsoleMessage(strInfo); + /* + * change of chain - record the length of the last one + */ + chainLengths.add(length); + length = 0; } - else if (mystatus.indexOf("Completed") >= 0) - { - sendConsoleEcho(strInfo.substring(strInfo.lastIndexOf(",") + 2, - strInfo.length() - 1)); - } - break; - case MESSAGE: - sendConsoleMessage(data == null ? null : strInfo); - break; - case PICK: - sendConsoleMessage(strInfo); - break; - default: - break; + lastChainId = chainId; + length++; + } + if (length > 0) + { + /* + * record the length of the final chain + */ + chainLengths.add(length); } - } - - String lastConsoleEcho = ""; - private void sendConsoleEcho(String string) - { - lastConsoleEcho += string; - lastConsoleEcho += "\n"; + return chainLengths; } - String lastConsoleMessage = ""; - - private void sendConsoleMessage(String string) + /** + * Helper method to construct a sequence for one chain and add it to the seqs + * list + * + * @param monomers + * a list of all monomers in the chain + * @param modelTitle + */ + protected void buildSequenceFromChain(List monomers, + String modelTitle) { - lastConsoleMessage += string; - lastConsoleMessage += "\n"; - } - - int lastScriptTermination = -1; + final int length = monomers.size(); - String lastScriptMessage = ""; + /* + * arrays to hold sequence and secondary structure + */ + char[] seq = new char[length]; + char[] secstr = new char[length]; + char[] secstrcode = new char[length]; - private void notifyScriptTermination(String string, int intValue) - { - lastScriptMessage += string; - lastScriptMessage += "\n"; - lastScriptTermination = intValue; - } + /* + * populate the sequence and secondary structure arrays + */ + extractJmolChainData(monomers, seq, secstr, secstrcode); - @Override - public boolean notifyEnabled(CBK callbackPick) - { - switch (callbackPick) + /* + * grab chain code and start position from first residue; + */ + String chainId = monomers.get(0).chain.getIDStr(); + int firstResNum = monomers.get(0).getResno(); + if (firstResNum < 1) { - case MESSAGE: - case SCRIPT: - case ECHO: - case LOADSTRUCT: - case ERROR: - return true; - default: - return false; + // Jalview doesn't like residue < 1, so force this to 1 + System.err.println("Converting chain " + chainId + " first RESNUM (" + + firstResNum + ") to 1"); + firstResNum = 1; } - } - /** - * Not implemented - returns null - */ - @Override - public String eval(String strEval) - { - return null; - } + /* + * convert any non-gap unknown residues to 'X' + */ + convertNonGapCharacters(seq); - /** - * Not implemented - returns null - */ - @Override - public float[][] functionXY(String functionName, int x, int y) - { - return null; - } + /* + * construct and add the Jalview sequence + */ + String seqName = "" + modelTitle + "|" + chainId; + int start = firstResNum; + int end = firstResNum + length - 1; - /** - * Not implemented - returns null - */ - @Override - public float[][][] functionXYZ(String functionName, int nx, int ny, int nz) - { - return null; - } + SequenceI sq = new Sequence(seqName, seq, start, end); - /** - * Not implemented - returns null - */ - @Override - public String createImage(String fileName, String imageType, - Object text_or_bytes, int quality) - { - return null; - } + addPdbid(sq, modelTitle, chainId); - /** - * Not implemented - returns null - */ - @Override - public Map getRegistryInfo() - { - return null; - } + addSourceDBref(sq, modelTitle, start, end); - /** - * Not implemented - */ - @Override - public void showUrl(String url) - { - } + seqs.add(sq); - /** - * Not implemented - returns null - */ - @Override - public Dimension resizeInnerPanel(String data) - { - return null; - } + /* + * add secondary structure predictions (if any) + */ + addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode, + chainId, firstResNum); - @Override - public Map getJSpecViewProperty(String arg0) - { - return null; } /** - * Calls the Jmol library to parse the PDB file, and then inspects the - * resulting object model to generate Jalview-style sequences, with secondary - * structure annotation added where available (i.e. where it has been computed - * by Jmol using DSSP). + * Scans the list of (Jmol) Monomer objects, and adds the residue for each to + * the sequence array, and any converted secondary structure prediction to the + * secondary structure arrays * - * @see jalview.io.AlignFile#parse() + * @param monomers + * @param seq + * @param secstr + * @param secstrcode */ - @Override - public void parse() throws IOException + protected void extractJmolChainData(List monomers, char[] seq, + char[] secstr, char[] secstrcode) { + int pos = 0; + for (Monomer monomer : monomers) + { + seq[pos] = monomer.getGroup1(); - setChains(new Vector()); - Viewer jmolModel = getJmolData(); - jmolModel.openReader(getDataName(), getDataName(), getReader()); - waitForScript(jmolModel); + /* + * JAL-1828 replace a modified amino acid with its standard equivalent + * (e.g. MSE with MET->M) to maximise sequence matching + */ + replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos); - /* - * Convert one or more Jmol Model objects to Jalview sequences - */ - if (jmolModel.ms.mc > 0) - { - // parseBiopolymer(jmolModel.ms); - transformJmolModelToJalview(jmolModel.ms); + /* + * if Jmol has derived a secondary structure prediction for this position, + * convert it to Jalview equivalent and save it + */ + setSecondaryStructure(monomer.getProteinStructureSubType(), pos, + secstr, secstrcode); + pos++; } } /** - * Process the Jmol BioPolymer array and generate a Jalview sequence for each - * chain found (including any secondary structure annotation from DSSP) + * Replace any non-gap miscellaneous characters with 'X' * - * @param ms - * @throws IOException + * @param seq + * @return */ - public void parseBiopolymer(ModelSet ms) throws IOException + protected void convertNonGapCharacters(char[] seq) { - int modelIndex = -1; - for (Model model : ms.am) - { - modelIndex++; - String modelTitle = (String) ms.getInfo(modelIndex, "title"); - /* - * Chains can span BioPolymers, so first make a flattened list, and then - * work out the lengths of chains present - */ - List monomers = getMonomers(ms, (BioModel) model); - List chainLengths = getChainLengths(monomers); + boolean isNa = Comparison.areNucleotide(new char[][] { seq }); + int[] cinds = isNa ? ResidueProperties.nucleotideIndex + : ResidueProperties.aaIndex; + int nonGap = isNa ? ResidueProperties.maxNucleotideIndex + : ResidueProperties.maxProteinIndex; - /* - * now chop up the Monomer list to make Jalview Sequences - */ - int from = 0; - for (int length : chainLengths) + for (int p = 0; p < seq.length; p++) + { + if (cinds[seq[p]] == nonGap) { - buildSequenceFromChain(monomers.subList(from, from + length), - modelTitle); - from += length; + seq[p] = 'X'; } } } - public void transformJmolModelToJalview(ModelSet ms) + /** + * Add a source db ref entry for the given sequence. + * + * @param sq + * @param accessionId + * @param start + * @param end + */ + protected void addSourceDBref(SequenceI sq, String accessionId, + int start, int end) { - try - { - String lastID = ""; - List rna = new ArrayList(); - List prot = new ArrayList(); - PDBChain tmpchain; - String pdbId = (String) ms.getInfo(0, "title"); - setId(pdbId); - List significantAtoms = convertSignificantAtoms(ms); - for (Atom tmpatom : significantAtoms) + DBRefEntry sourceDBRef = new DBRefEntry(); + sourceDBRef.setAccessionId(accessionId); + sourceDBRef.setSource(DBRefSource.MMCIF); + sourceDBRef.setStartRes(start); + sourceDBRef.setEndRes(end); + sq.setSourceDBRef(sourceDBRef); + sq.addDBRef(sourceDBRef); + } + + /** + * Add a PDBEntry giving the source of PDB data to the sequence + * + * @param sq + * @param id + * @param chainId + */ + protected void addPdbid(SequenceI sq, String id, String chainId) + { + PDBEntry entry = new PDBEntry(); + entry.setId(id); + entry.setType(PDBEntry.Type.MMCIF); + entry.setProperty(new Hashtable()); + if (chainId != null) + { + // entry.getProperty().put("CHAIN", chains.elementAt(i).id); + entry.setChainCode(String.valueOf(chainId)); + } + if (inFile != null) + { + entry.setFile(inFile.getAbsolutePath()); + } + else + { + // TODO: decide if we should dump the datasource to disk + entry.setFile(getDataName()); + } + + sq.addPDBId(entry); + } + + + /** + * Helper method that adds an AlignmentAnnotation for secondary structure to + * the sequence, provided at least one secondary structure prediction has been + * made + * + * @param modelTitle + * @param seq + * @param secstr + * @param secstrcode + * @param chainId + * @param firstResNum + * @return + */ + protected void addSecondaryStructureAnnotation(String modelTitle, + SequenceI sq, char[] secstr, char[] secstrcode, String chainId, + int firstResNum) + { + char[] seq = sq.getSequence(); + boolean ssFound = false; + Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1]; + for (int p = 0; p < seq.length; p++) + { + if (secstr[p] >= 'A' && secstr[p] <= 'z') { - try - { - tmpchain = findChain(tmpatom.chain); - if (tmpatom.resNumIns.trim().equals(lastID)) - { - // phosphorylated protein - seen both CA and P.. - continue; - } - tmpchain.atoms.addElement(tmpatom); - } catch (Exception e) - { - tmpchain = new PDBChain(pdbId, tmpatom.chain); - getChains().add(tmpchain); - tmpchain.atoms.addElement(tmpatom); - } - lastID = tmpatom.resNumIns.trim(); + asecstr[p] = new Annotation(String.valueOf(secstr[p]), null, + secstrcode[p], Float.NaN); + ssFound = true; } - makeResidueList(); - makeCaBondList(); + } - if (getId() == null) + if (ssFound) + { + String mt = modelTitle == null ? getDataName() : modelTitle; + mt += chainId; + AlignmentAnnotation ann = new AlignmentAnnotation( + "Secondary Structure", "Secondary Structure for " + mt, + asecstr); + ann.belowAlignment = true; + ann.visible = true; + ann.autoCalculated = false; + ann.setCalcId(getClass().getName()); + ann.adjustForAlignment(); + ann.validateRangeAndDisplay(); + annotations.add(ann); + sq.addAlignmentAnnotation(ann); + } + } + + private void waitForScript(Viewer jmd) + { + while (jmd.isScriptExecuting()) + { + try { - setId(inFile.getName()); - } - for (PDBChain chain : getChains()) + Thread.sleep(50); + + } catch (InterruptedException x) { - SequenceI chainseq = postProcessChain(chain); - if (isRNA(chainseq)) - { - rna.add(chainseq); - } - else - { - prot.add(chainseq); - } } - } catch (OutOfMemoryError er) + } + } + + /** + * Convert Jmol's secondary structure code to Jalview's, and stored it in the + * secondary structure arrays at the given sequence position + * + * @param proteinStructureSubType + * @param pos + * @param secstr + * @param secstrcode + */ + protected void setSecondaryStructure(STR proteinStructureSubType, + int pos, char[] secstr, char[] secstrcode) + { + switch (proteinStructureSubType) { - System.out - .println("OUT OF MEMORY LOADING TRANSFORMING JMOL MODEL TO JALVIEW MODEL"); - // throw new IOException( - // MessageManager - // .getString("exception.outofmemory_loading_pdb_file")); + case HELIX310: + secstr[pos] = '3'; + break; + case HELIX: + case HELIXALPHA: + secstr[pos] = 'H'; + break; + case HELIXPI: + secstr[pos] = 'P'; + break; + case SHEET: + secstr[pos] = 'E'; + break; + default: + secstr[pos] = 0; + } + + switch (proteinStructureSubType) + { + case HELIX310: + case HELIXALPHA: + case HELIXPI: + case HELIX: + secstrcode[pos] = 'H'; + break; + case SHEET: + secstrcode[pos] = 'E'; + break; + default: + secstrcode[pos] = 0; } } - private List convertSignificantAtoms(ModelSet ms) + private char getSecondayStructure(STR proteinStructureSubType) { - List significantAtoms = new ArrayList(); - for (org.jmol.modelset.Atom atom : ms.at) + switch (proteinStructureSubType) { - if (atom.getAtomName().equalsIgnoreCase("CA") - || atom.getAtomName().equalsIgnoreCase("P")) - { - Atom curAtom = new Atom(atom.x, atom.y, atom.z); - curAtom.atomIndex = atom.getIndex(); - curAtom.chain = atom.getChainIDStr(); - curAtom.insCode = atom.group.getInsertionCode(); - curAtom.name = atom.getAtomName(); - curAtom.number = atom.getAtomNumber(); - curAtom.resName = atom.getGroup3(true); - curAtom.resNumber = atom.getResno(); - curAtom.occupancy = ms.occupancies != null ? ms.occupancies[atom - .getIndex()] : Float.valueOf(atom.getOccupancy100()); - curAtom.resNumIns = "" + curAtom.resNumber + curAtom.insCode; - curAtom.tfactor = 0; - curAtom.type = 0; - significantAtoms.add(curAtom); - } + case HELIX310: + return '3'; + case HELIX: + case HELIXALPHA: + return 'H'; + case HELIXPI: + return 'P'; + case SHEET: + return 'E'; + default: + return 0; } - return significantAtoms; } /** - * Helper method to construct a sequence for one chain and add it to the seqs - * list + * Convert any non-standard peptide codes to their standard code table + * equivalent. (Initial version only does Selenomethionine MSE->MET.) * - * @param monomers - * a list of all monomers in the chain - * @param modelTitle + * @param threeLetterCode + * @param seq + * @param pos */ - protected void buildSequenceFromChain(List monomers, - String modelTitle) + protected void replaceNonCanonicalResidue(String threeLetterCode, + char[] seq, int pos) { - final int length = monomers.size(); + String canonical = ResidueProperties + .getCanonicalAminoAcid(threeLetterCode); + if (canonical != null && !canonical.equalsIgnoreCase(threeLetterCode)) + { + seq[pos] = ResidueProperties.getSingleCharacterCode(canonical); + } + } - /* - * arrays to hold sequence and secondary structure - */ - char[] seq = new char[length]; - char[] secstr = new char[length]; - char[] secstrcode = new char[length]; + /** + * Not implemented - returns null + */ + @Override + public String print() + { + return null; + } - /* - * populate the sequence and secondary structure arrays - */ - extractJmolChainData(monomers, seq, secstr, secstrcode); + /** + * Not implemented + */ + @Override + public void setCallbackFunction(String callbackType, + String callbackFunction) + { + } - /* - * grab chain code and start position from first residue; - */ - String chainId = monomers.get(0).chain.getIDStr(); - int firstResNum = monomers.get(0).getResno(); - if (firstResNum < 1) + @Override + public void notifyCallback(CBK cbType, Object[] data) + { + String strInfo = (data == null || data[1] == null ? null : data[1] + .toString()); + switch (cbType) { - // Jalview doesn't like residue < 1, so force this to 1 - System.err.println("Converting chain " + chainId + " first RESNUM (" - + firstResNum + ") to 1"); - firstResNum = 1; + case ECHO: + sendConsoleEcho(strInfo); + break; + case SCRIPT: + notifyScriptTermination((String) data[2], + ((Integer) data[3]).intValue()); + break; + case MEASURE: + String mystatus = (String) data[3]; + if (mystatus.indexOf("Picked") >= 0 + || mystatus.indexOf("Sequence") >= 0) + { + // Picking mode + sendConsoleMessage(strInfo); + } + else if (mystatus.indexOf("Completed") >= 0) + { + sendConsoleEcho(strInfo.substring(strInfo.lastIndexOf(",") + 2, + strInfo.length() - 1)); + } + break; + case MESSAGE: + sendConsoleMessage(data == null ? null : strInfo); + break; + case PICK: + sendConsoleMessage(strInfo); + break; + default: + break; } + } - /* - * convert any non-gap unknown residues to 'X' - */ - convertNonGapCharacters(seq); + String lastConsoleEcho = ""; - /* - * construct and add the Jalview sequence - */ - String seqName = "" + modelTitle + "|" + chainId; - int start = firstResNum; - int end = firstResNum + length - 1; + private void sendConsoleEcho(String string) + { + lastConsoleEcho += string; + lastConsoleEcho += "\n"; + } - SequenceI sq = new Sequence(seqName, seq, start, end); + String lastConsoleMessage = ""; - addPdbid(sq, modelTitle, chainId); + private void sendConsoleMessage(String string) + { + lastConsoleMessage += string; + lastConsoleMessage += "\n"; + } - addSourceDBref(sq, modelTitle, start, end); + int lastScriptTermination = -1; + + String lastScriptMessage = ""; - seqs.add(sq); + private void notifyScriptTermination(String string, int intValue) + { + lastScriptMessage += string; + lastScriptMessage += "\n"; + lastScriptTermination = intValue; + } - /* - * add secondary structure predictions (if any) - */ - if (isPredictSecondaryStructure()) + @Override + public boolean notifyEnabled(CBK callbackPick) + { + switch (callbackPick) { - addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode, - chainId, firstResNum); + case MESSAGE: + case SCRIPT: + case ECHO: + case LOADSTRUCT: + case ERROR: + return true; + default: + return false; } - } /** - * Add a source db ref entry for the given sequence. - * - * @param sq - * @param accessionId - * @param start - * @param end + * Not implemented - returns null */ - protected void addSourceDBref(SequenceI sq, String accessionId, - int start, int end) + @Override + public String eval(String strEval) { - DBRefEntry sourceDBRef = new DBRefEntry(); - sourceDBRef.setAccessionId(accessionId); - sourceDBRef.setSource(DBRefSource.MMCIF); - sourceDBRef.setStartRes(start); - sourceDBRef.setEndRes(end); - sq.setSourceDBRef(sourceDBRef); - sq.addDBRef(sourceDBRef); + return null; } /** - * Add a PDBEntry giving the source of PDB data to the sequence - * - * @param sq - * @param id - * @param chainId + * Not implemented - returns null */ - protected void addPdbid(SequenceI sq, String id, String chainId) + @Override + public float[][] functionXY(String functionName, int x, int y) { - PDBEntry entry = new PDBEntry(); - entry.setId(id); - entry.setType(PDBEntry.Type.MMCIF); - entry.setProperty(new Hashtable()); - if (chainId != null) - { - // entry.getProperty().put("CHAIN", chains.elementAt(i).id); - entry.setChainCode(String.valueOf(chainId)); - } - if (inFile != null) - { - entry.setFile(inFile.getAbsolutePath()); - } - else - { - // TODO: decide if we should dump the datasource to disk - entry.setFile(getDataName()); - } - - sq.addPDBId(entry); + return null; } /** - * Scans the list of (Jmol) Monomer objects, and adds the residue for each to - * the sequence array, and any converted secondary structure prediction to the - * secondary structure arrays - * - * @param monomers - * @param seq - * @param secstr - * @param secstrcode + * Not implemented - returns null */ - protected void extractJmolChainData(List monomers, char[] seq, - char[] secstr, char[] secstrcode) + @Override + public float[][][] functionXYZ(String functionName, int nx, int ny, int nz) { - int pos = 0; - for (Monomer monomer : monomers) - { - seq[pos] = monomer.getGroup1(); - - /* - * JAL-1828 replace a modified amino acid with its standard equivalent - * (e.g. MSE with MET->M) to maximise sequence matching - */ - replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos); - - /* - * if Jmol has derived a secondary structure prediction for this position, - * convert it to Jalview equivalent and save it - */ - setSecondaryStructure(monomer.getProteinStructureSubType(), pos, - secstr, secstrcode); - pos++; - } + return null; } /** - * Helper method that adds an AlignmentAnnotation for secondary structure to - * the sequence, provided at least one secondary structure prediction has been - * made - * - * @param modelTitle - * @param seq - * @param secstr - * @param secstrcode - * @param chainId - * @param firstResNum - * @return + * Not implemented - returns null */ - protected void addSecondaryStructureAnnotation(String modelTitle, - SequenceI sq, char[] secstr, char[] secstrcode, String chainId, - int firstResNum) + @Override + public String createImage(String fileName, String imageType, + Object text_or_bytes, int quality) { - char[] seq = sq.getSequence(); - boolean ssFound = false; - Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1]; - for (int p = 0; p < seq.length; p++) - { - if (secstr[p] >= 'A' && secstr[p] <= 'z') - { - asecstr[p] = new Annotation(String.valueOf(secstr[p]), null, - secstrcode[p], Float.NaN); - ssFound = true; - } - } - - if (ssFound) - { - String mt = modelTitle == null ? getDataName() : modelTitle; - mt += chainId; - AlignmentAnnotation ann = new AlignmentAnnotation( - "Secondary Structure", "Secondary Structure for " + mt, - asecstr); - ann.belowAlignment = true; - ann.visible = true; - ann.autoCalculated = false; - ann.setCalcId(getClass().getName()); - ann.adjustForAlignment(); - ann.validateRangeAndDisplay(); - annotations.add(ann); - sq.addAlignmentAnnotation(ann); - } + return null; } /** - * Replace any non-gap miscellaneous characters with 'X' - * - * @param seq - * @return + * Not implemented - returns null */ - protected void convertNonGapCharacters(char[] seq) + @Override + public Map getRegistryInfo() { - boolean isNa = Comparison.areNucleotide(new char[][] { seq }); - int[] cinds = isNa ? ResidueProperties.nucleotideIndex - : ResidueProperties.aaIndex; - int nonGap = isNa ? ResidueProperties.maxNucleotideIndex - : ResidueProperties.maxProteinIndex; - - for (int p = 0; p < seq.length; p++) - { - if (cinds[seq[p]] == nonGap) - { - seq[p] = 'X'; - } - } + return null; } /** - * Scans the list of Monomers (residue models), inspecting the chain id for - * each, and returns an array whose length is the number of chains, and values - * the length of each chain - * - * @param monomers - * @return + * Not implemented */ - protected List getChainLengths(List monomers) + @Override + public void showUrl(String url) { - List chainLengths = new ArrayList(); - int lastChainId = -1; - int length = 0; - - for (Monomer monomer : monomers) - { - int chainId = monomer.chain.chainID; - if (chainId != lastChainId && length > 0) - { - /* - * change of chain - record the length of the last one - */ - chainLengths.add(length); - length = 0; - } - lastChainId = chainId; - length++; - } - if (length > 0) - { - /* - * record the length of the final chain - */ - chainLengths.add(length); - } - - return chainLengths; } /** - * Returns a flattened list of Monomer (residues) in order, across all - * BioPolymers in the model. This simplifies assembling chains which span - * BioPolymers. The result omits any alternate residues reported for the same - * sequence position (RESNUM value). - * - * @param ms - * @param model - * @return + * Not implemented - returns null */ - protected List getMonomers(ModelSet ms, BioModel model) + @Override + public Dimension resizeInnerPanel(String data) { - List result = new ArrayList(); - int lastResNo = Integer.MIN_VALUE; + return null; + } - for (BioPolymer bp : model.bioPolymers) - { - for (int groupLeadAtoms : bp.getLeadAtomIndices()) - { - Group group = ms.at[groupLeadAtoms].group; - if (group instanceof Monomer) - { - /* - * ignore alternate residue at same position example: 1ejg has - * residues A:LEU, B:ILE at RESNUM=25 - */ - int resNo = group.getResno(); - if (lastResNo != resNo) - { - result.add((Monomer) group); - } - lastResNo = resNo; - } - } - } - return result; + @Override + public Map getJSpecViewProperty(String arg0) + { + return null; } public boolean isPredictSecondaryStructure() diff --git a/src/jalview/io/StructureFile.java b/src/jalview/io/StructureFile.java index d4c2d7f..e2b73d1 100644 --- a/src/jalview/io/StructureFile.java +++ b/src/jalview/io/StructureFile.java @@ -1,6 +1,7 @@ package jalview.io; import jalview.analysis.AlignSeq; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -10,6 +11,7 @@ import jalview.datamodel.SequenceI; import java.awt.Color; import java.io.IOException; +import java.lang.reflect.Constructor; import java.util.Hashtable; import java.util.List; import java.util.Vector; @@ -66,6 +68,7 @@ public abstract class StructureFile extends AlignFile { } + @SuppressWarnings("rawtypes") protected SequenceI postProcessChain(PDBChain chain) { SequenceI pdbSequence = chain.sequence; @@ -94,11 +97,15 @@ public abstract class StructureFile extends AlignFile sourceDBRef.setEndRes(pdbSequence.getEnd()); // PDBChain objects maintain reference to dataset - SequenceI chainseq = pdbSequence.deriveSequence(); - chainseq.setSourceDBRef(sourceDBRef); + + // SequenceI chainseq = pdbSequence.deriveSequence(); + // chainseq.setSourceDBRef(sourceDBRef); + // chainseq.addPDBId(entry); + // chainseq.addDBRef(sourceDBRef); + SequenceI chainseq = chain.sequence; chainseq.addPDBId(entry); + chainseq.setSourceDBRef(sourceDBRef); chainseq.addDBRef(sourceDBRef); - seqs.addElement(chainseq); AlignmentAnnotation[] chainannot = chainseq.getAnnotation(); @@ -114,6 +121,7 @@ public abstract class StructureFile extends AlignFile return chainseq; } + @SuppressWarnings({ "unchecked", "rawtypes" }) protected void processPdbFileWithAnnotate3d(List rna) throws Exception { @@ -158,6 +166,7 @@ public abstract class StructureFile extends AlignFile } } + @SuppressWarnings("unchecked") protected void replaceAndUpdateChains(List prot, AlignmentI al, String pep, boolean b) @@ -188,6 +197,87 @@ public abstract class StructureFile extends AlignFile } } + /** + * Predict secondary structure for RNA and/or protein sequences and add as + * annotations + * + * @param rnaSequences + * @param proteinSequences + */ + protected void addSecondaryStructure(List rnaSequences, + List proteinSequences) + { + /* + * Currently using Annotate3D for RNA, but only if the 'use external + * prediction' flag is set + */ + if (externalSecondaryStructure && rnaSequences.size() > 0) + { + try + { + processPdbFileWithAnnotate3d(rnaSequences); + } catch (Exception x) + { + System.err.println("Exceptions when dealing with RNA in pdb file"); + x.printStackTrace(); + + } + } + + /* + * Currently using JMol PDB parser for peptide + */ + if (proteinSequences.size() > 0) + { + try + { + processWithJmolParser(proteinSequences); + } catch (Exception x) + { + System.err + .println("Exceptions from Jmol when processing data in pdb file"); + x.printStackTrace(); + } + } + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private void processWithJmolParser(List prot) + throws Exception + { + try + { + + Class cl = Class.forName("jalview.ext.jmol.JmolParser"); + if (cl != null) + { + final Constructor constructor = cl + .getConstructor(new Class[] { FileParse.class }); + final Object[] args = new Object[] { new FileParse(getDataName(), + type) }; + Object jmf = constructor.newInstance(args); + AlignmentI al = new Alignment((SequenceI[]) cl.getMethod( + "getSeqsAsArray", new Class[] {}).invoke(jmf)); + cl.getMethod("addAnnotations", new Class[] { AlignmentI.class }) + .invoke(jmf, al); + for (SequenceI sq : al.getSequences()) + { + if (sq.getDatasetSequence() != null) + { + sq.getDatasetSequence().getAllPDBEntries().clear(); + } + else + { + sq.getAllPDBEntries().clear(); + } + } + replaceAndUpdateChains(prot, al, AlignSeq.PEP, false); + } + } catch (ClassNotFoundException q) + { + } + } + public PDBChain findChain(String id) throws Exception { for (PDBChain chain : getChains())