- /**
- * Calls the Jmol library to parse the PDB file, and then inspects the
- * resulting object model to generate Jalview-style sequences, with secondary
- * structure annotation added where available (i.e. where it has been computed
- * by Jmol using DSSP).
- *
- * @see jalview.io.AlignFile#parse()
- */
- @Override
- public void parse() throws IOException
- {
-
- chains = new ArrayList<PDBChain>();
- Viewer jmolModel = getJmolData();
- jmolModel.openReader(getDataName(), getDataName(), getReader());
- waitForScript(jmolModel);
-
- /*
- * Convert one or more Jmol Model objects to Jalview sequences
- */
- if (jmolModel.ms.mc > 0)
- {
- parseBiopolymers(jmolModel.ms);
- }
- }
-
- /**
- * Process the Jmol BioPolymer array and generate a Jalview sequence for each
- * chain found (including any secondary structure annotation from DSSP)
- *
- * @param ms
- * @throws IOException
- */
- public void parseBiopolymers(ModelSet ms) throws IOException
- {
- int modelIndex = -1;
- for (Model model : ms.am)
- {
- modelIndex++;
- String modelTitle = (String) ms.getInfo(modelIndex, "title");
-
- /*
- * Chains can span BioPolymers, so first make a flattened list,
- * and then work out the lengths of chains present
- */
- List<Monomer> monomers = getMonomers(ms, (BioModel) model);
- List<Integer> chainLengths = getChainLengths(monomers);
-
- /*
- * now chop up the Monomer list to make Jalview Sequences
- */
- int from = 0;
- for (int length : chainLengths)
- {
- buildSequenceFromChain(monomers.subList(from, from + length), modelTitle);
- from += length;
- }
- }
- }
-
- /**
- * Helper method to construct a sequence for one chain and add it to the seqs
- * list
- *
- * @param monomers
- * a list of all monomers in the chain
- * @param modelTitle
- */
- protected void buildSequenceFromChain(List<Monomer> monomers, String modelTitle)
- {
- final int length = monomers.size();
-
- /*
- * arrays to hold sequence and secondary structure
- */
- char[] seq = new char[length];
- char[] secstr = new char[length];
- char[] secstrcode = new char[length];
-
- /*
- * populate the sequence and secondary structure arrays
- */
- extractJmolChainData(monomers, seq, secstr, secstrcode);
-
- /*
- * grab chain code and start position from first residue;
- */
- String chainId = monomers.get(0).chain.getIDStr();
- int firstResNum = monomers.get(0).getResno();
- if (firstResNum < 1)
- {
- // Jalview doesn't like residue < 1, so force this to 1
- System.err.println("Converting chain " + chainId + " first RESNUM ("
- + firstResNum + ") to 1");
- firstResNum = 1;
- }
-
- /*
- * convert any non-gap unknown residues to 'X'
- */
- convertNonGapCharacters(seq);
-
- /*
- * construct and add the Jalview sequence
- */
- String seqName = "" + modelTitle + "|"
- + chainId;
- int start = firstResNum;
- int end = firstResNum + length - 1;
-
- SequenceI sq = new Sequence(seqName, seq, start, end);
-
- addPdbid(sq, modelTitle, chainId);
-
- addSourceDBref(sq, modelTitle, start, end);
-
- seqs.add(sq);
-
- addChainMetaData(sq, monomers, chainId);
-
- /*
- * add secondary structure predictions (if any)
- */
- if (isPredictSecondaryStructure())
- {
- addSecondaryStructureAnnotation(modelTitle, sq, secstr, secstrcode,
- chainId, firstResNum);
- }
-
- }
-
- public void addChainMetaData(SequenceI sq, List<Monomer> monomers,
- String chainId)
- {
- for (char res : sq.getSequence())
- {
-
- }
- }
-
- /**
- * Add a source db ref entry for the given sequence.
- *
- * @param sq
- * @param accessionId
- * @param start
- * @param end
- */
- protected void addSourceDBref(SequenceI sq, String accessionId,
- int start, int end)
- {
- DBRefEntry sourceDBRef = new DBRefEntry();
- sourceDBRef.setAccessionId(accessionId);
- sourceDBRef.setSource(DBRefSource.MMCIF);
- sourceDBRef.setStartRes(start);
- sourceDBRef.setEndRes(end);
- sq.setSourceDBRef(sourceDBRef);
- sq.addDBRef(sourceDBRef);
- }
-
-
- /**
- * Add a PDBEntry giving the source of PDB data to the sequence
- *
- * @param sq
- * @param id
- * @param chainId
- */
- protected void addPdbid(SequenceI sq, String id, String chainId)
- {
- PDBEntry entry = new PDBEntry();
- entry.setId(id);
- entry.setType(PDBEntry.Type.MMCIF);
- entry.setProperty(new Hashtable());
- if (chainId != null)
- {
- // entry.getProperty().put("CHAIN", chains.elementAt(i).id);
- entry.setChainCode(String.valueOf(chainId));
- }
- if (inFile != null)
- {
- entry.setFile(inFile.getAbsolutePath());
- }
- else
- {
- // TODO: decide if we should dump the datasource to disk
- entry.setFile(getDataName());
- }
-
- sq.addPDBId(entry);
- }
-
- /**
- * Scans the list of (Jmol) Monomer objects, and adds the residue for each to
- * the sequence array, and any converted secondary structure prediction to the
- * secondary structure arrays
- *
- * @param monomers
- * @param seq
- * @param secstr
- * @param secstrcode
- */
- protected void extractJmolChainData(List<Monomer> monomers, char[] seq,
- char[] secstr, char[] secstrcode)
- {
- int pos = 0;
- for (Monomer monomer : monomers)
- {
- seq[pos] = monomer.getGroup1();
-
- /*
- * JAL-1828 replace a modified amino acid with its standard
- * equivalent (e.g. MSE with MET->M) to maximise sequence matching
- */
- replaceNonCanonicalResidue(monomer.getGroup3(), seq, pos);
-
- /*
- * if Jmol has derived a secondary structure prediction for
- * this position, convert it to Jalview equivalent and save it
- */
- setSecondaryStructure(monomer.getProteinStructureSubType(), pos,
- secstr, secstrcode);
- pos++;
- }
- }
-
- /**
- * Helper method that adds an AlignmentAnnotation for secondary structure to
- * the sequence, provided at least one secondary structure prediction has been
- * made
- *
- * @param modelTitle
- * @param seq
- * @param secstr
- * @param secstrcode
- * @param chainId
- * @param firstResNum
- * @return
- */
- protected void addSecondaryStructureAnnotation(String modelTitle,
- SequenceI sq, char[] secstr, char[] secstrcode,
- String chainId, int firstResNum)
- {
- char[] seq = sq.getSequence();
- boolean ssFound = false;
- Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1];
- for (int p = 0; p < seq.length; p++)
- {
- if (secstr[p] >= 'A' && secstr[p] <= 'z')
- {
- asecstr[p] = new Annotation(String.valueOf(secstr[p]), null,
- secstrcode[p], Float.NaN);
- ssFound = true;
- }
- }
-
- if (ssFound)
- {
- String mt = modelTitle == null ? getDataName() : modelTitle;
- mt += chainId;
- AlignmentAnnotation ann = new AlignmentAnnotation(
- "Secondary Structure", "Secondary Structure for " + mt,
- asecstr);
- ann.belowAlignment = true;
- ann.visible = true;
- ann.autoCalculated = false;
- ann.setCalcId(getClass().getName());
- ann.adjustForAlignment();
- ann.validateRangeAndDisplay();
- annotations.add(ann);
- sq.addAlignmentAnnotation(ann);
- }
- }
-
- /**
- * Replace any non-gap miscellaneous characters with 'X'
- *
- * @param seq
- * @return
- */
- protected void convertNonGapCharacters(char[] seq)
- {
- boolean isNa = Comparison.areNucleotide(new char[][] { seq });
- int[] cinds = isNa ? ResidueProperties.nucleotideIndex
- : ResidueProperties.aaIndex;
- int nonGap = isNa ? ResidueProperties.maxNucleotideIndex
- : ResidueProperties.maxProteinIndex;
-
- for (int p = 0; p < seq.length; p++)
- {
- if (cinds[seq[p]] == nonGap)
- {
- seq[p] = 'X';
- }
- }
- }
-
-
- /**
- * Scans the list of Monomers (residue models), inspecting the chain id for
- * each, and returns an array whose length is the number of chains, and values
- * the length of each chain
- *
- * @param monomers
- * @return
- */
- protected List<Integer> getChainLengths(List<Monomer> monomers)
- {
- List<Integer> chainLengths = new ArrayList<Integer>();
- int lastChainId = -1;
- int length = 0;
-
- for (Monomer monomer : monomers)
- {
- int chainId = monomer.chain.chainID;
- if (chainId != lastChainId && length > 0)
- {
- /*
- * change of chain - record the length of the last one
- */
- chainLengths.add(length);
- length = 0;
- }
- lastChainId = chainId;
- length++;
- }
- if (length > 0)
- {
- /*
- * record the length of the final chain
- */
- chainLengths.add(length);
- }
-
- return chainLengths;
- }
-
- /**
- * Returns a flattened list of Monomer (residues) in order, across all
- * BioPolymers in the model. This simplifies assembling chains which span
- * BioPolymers. The result omits any alternate residues reported for the same
- * sequence position (RESNUM value).
- *
- * @param ms
- * @param model
- * @return
- */
- protected List<Monomer> getMonomers(ModelSet ms, BioModel model)
- {
- List<Monomer> result = new ArrayList<Monomer>();
- int lastResNo = Integer.MIN_VALUE;
-
- for (BioPolymer bp : model.bioPolymers)
- {
- for (int groupLeadAtoms : bp.getLeadAtomIndices())
- {
- Group group = ms.at[groupLeadAtoms].group;
- if (group instanceof Monomer)
- {
- /*
- * ignore alternate residue at same position
- * example: 1ejg has residues A:LEU, B:ILE at RESNUM=25
- */
- int resNo = group.getResno();
- if (lastResNo != resNo)
- {
- result.add((Monomer) group);
- }
- lastResNo = resNo;
- }
- }
- }
- return result;
- }
-