X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=9790f7918f89dc9e77fb953cb158f37d36488bef;hb=af259f508805faf2da90585ee9a67cd7853bf5aa;hp=68af7c3f570f36ca3b9ce35b39eb9ee9c89b57f0;hpb=f4766a7bbcfae845fc95923b01fa14ff83d589ff;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 68af7c3..9790f79 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -20,28 +20,6 @@ */ package jalview.ws.sifts; -import jalview.analysis.AlignSeq; -import jalview.analysis.scoremodels.ScoreMatrix; -import jalview.analysis.scoremodels.ScoreModels; -import jalview.api.DBRefEntryI; -import jalview.api.SiftsClientI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; -import jalview.datamodel.SequenceI; -import jalview.io.StructureFile; -import jalview.schemes.ResidueProperties; -import jalview.structure.StructureMapping; -import jalview.util.Comparison; -import jalview.util.DBRefUtils; -import jalview.util.Format; -import jalview.xml.binding.sifts.Entry; -import jalview.xml.binding.sifts.Entry.Entity; -import jalview.xml.binding.sifts.Entry.Entity.Segment; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; -import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; - import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -73,6 +51,28 @@ import javax.xml.stream.XMLStreamReader; import MCview.Atom; import MCview.PDBChain; +import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.api.DBRefEntryI; +import jalview.api.SiftsClientI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.io.BackupFiles; +import jalview.io.StructureFile; +import jalview.schemes.ResidueProperties; +import jalview.structure.StructureMapping; +import jalview.util.Comparison; +import jalview.util.DBRefUtils; +import jalview.util.Format; +import jalview.xml.binding.sifts.Entry; +import jalview.xml.binding.sifts.Entry.Entity; +import jalview.xml.binding.sifts.Entry.Entity.Segment; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; +import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.ResidueDetail; public class SiftsClient implements SiftsClientI { @@ -92,14 +92,24 @@ public class SiftsClient implements SiftsClientI private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; + /** + * PDB sequence position to sequence coordinate mapping as derived from SIFTS + * record for the identified SeqCoordSys Used for lift-over from sequence + * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence + * being annotated with PDB data + */ + private jalview.datamodel.Mapping seqFromPdbMapping; + private static final int BUFFER_SIZE = 4096; - public static final int UNASSIGNED = -1; + public static final int UNASSIGNED = Integer.MIN_VALUE; private static final int PDB_RES_POS = 0; private static final int PDB_ATOM_POS = 1; + private static final int PDBE_POS = 2; + private static final String NOT_OBSERVED = "Not_Observed"; private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; @@ -113,6 +123,7 @@ public class SiftsClient implements SiftsClientI private enum CoordinateSys { UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe"); + private String name; private CoordinateSys(String name) @@ -130,6 +141,7 @@ public class SiftsClient implements SiftsClientI { NAME_SEC_STRUCTURE("nameSecondaryStructure"), CODE_SEC_STRUCTURE("codeSecondaryStructure"), ANNOTATION("Annotation"); + private String code; private ResidueDetailType(String code) @@ -215,7 +227,7 @@ public class SiftsClient implements SiftsClientI SiftsSettings.getCacheThresholdInDays())) { File oldSiftsFile = new File(siftsFileName + "_old"); - siftsFile.renameTo(oldSiftsFile); + BackupFiles.moveFileToFile(siftsFile, oldSiftsFile); try { siftsFile = downloadSiftsFile(pdbId.toLowerCase()); @@ -224,7 +236,7 @@ public class SiftsClient implements SiftsClientI } catch (IOException e) { e.printStackTrace(); - oldSiftsFile.renameTo(siftsFile); + BackupFiles.moveFileToFile(oldSiftsFile, siftsFile); return new File(siftsFileName); } } @@ -413,6 +425,11 @@ public class SiftsClient implements SiftsClientI public StructureMapping getSiftsStructureMapping(SequenceI seq, String pdbFile, String chain) throws SiftsException { + SequenceI aseq = seq; + while (seq.getDatasetSequence() != null) + { + seq = seq.getDatasetSequence(); + } structId = (chain == null) ? pdbId : pdbId + "|" + chain; System.out.println("Getting SIFTS mapping for " + structId + ": seq " + seq.getName()); @@ -435,8 +452,9 @@ public class SiftsClient implements SiftsClientI HashMap mapping = getGreedyMapping(chain, seq, ps); String mappingOutput = mappingDetails.toString(); - StructureMapping siftsMapping = new StructureMapping(seq, pdbFile, - pdbId, chain, mapping, mappingOutput); + StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile, + pdbId, chain, mapping, mappingOutput, seqFromPdbMapping); + return siftsMapping; } @@ -444,8 +462,8 @@ public class SiftsClient implements SiftsClientI public HashMap getGreedyMapping(String entityId, SequenceI seq, java.io.PrintStream os) throws SiftsException { - List omitNonObserved = new ArrayList(); - int nonObservedShiftIndex = 0; + List omitNonObserved = new ArrayList<>(); + int nonObservedShiftIndex = 0, pdbeNonObserved = 0; // System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); @@ -476,7 +494,7 @@ public class SiftsClient implements SiftsClientI TreeMap resNumMap = new TreeMap(); List segments = entity.getSegment(); SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, - omitNonObserved, nonObservedShiftIndex); + omitNonObserved, nonObservedShiftIndex, pdbeNonObserved); processSegments(segments, shp); try { @@ -498,15 +516,66 @@ public class SiftsClient implements SiftsClientI { throw new SiftsException("SIFTS mapping failed"); } + // also construct a mapping object between the seq-coord sys and the PDB + // seq's coord sys Integer[] keys = mapping.keySet().toArray(new Integer[0]); Arrays.sort(keys); seqStart = keys[0]; seqEnd = keys[keys.length - 1]; - + List from = new ArrayList<>(), to = new ArrayList<>(); + int[] _cfrom = null, _cto = null; String matchedSeq = originalSeq; - if (seqStart != UNASSIGNED) + if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb + // sequence that starts <-1 { + for (int seqps : keys) + { + int pdbpos = mapping.get(seqps)[PDBE_POS]; + if (pdbpos == UNASSIGNED) + { + // not correct - pdbpos might be -1, but leave it for now + continue; + } + if (_cfrom == null || seqps != _cfrom[1] + 1) + { + _cfrom = new int[] { seqps, seqps }; + from.add(_cfrom); + _cto = null; // discontinuity + } + else + { + _cfrom[1] = seqps; + } + if (_cto == null || pdbpos != 1 + _cto[1]) + { + _cto = new int[] { pdbpos, pdbpos }; + to.add(_cto); + } + else + { + _cto[1] = pdbpos; + } + } + _cfrom = new int[from.size() * 2]; + _cto = new int[to.size() * 2]; + int p = 0; + for (int[] range : from) + { + _cfrom[p++] = range[0]; + _cfrom[p++] = range[1]; + } + ; + p = 0; + for (int[] range : to) + { + _cto[p++] = range[0]; + _cto[p++] = range[1]; + } + ; + + seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom, + 1, 1); pdbStart = mapping.get(seqStart)[PDB_RES_POS]; pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; int orignalSeqStart = seq.getStart(); @@ -559,6 +628,8 @@ public class SiftsClient implements SiftsClientI TreeMap resNumMap = shp.getResNumMap(); List omitNonObserved = shp.getOmitNonObserved(); int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); + int pdbeNonObservedCount = shp.getPdbeNonObserved(); + int firstPDBResNum = UNASSIGNED; for (Segment segment : segments) { // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s @@ -566,6 +637,9 @@ public class SiftsClient implements SiftsClientI List residues = segment.getListResidue().getResidue(); for (Residue residue : residues) { + boolean isObserved = isResidueObserved(residue); + int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(), + UNASSIGNED); int currSeqIndex = UNASSIGNED; List cRefDbs = residue.getCrossRefDb(); CrossRefDb pdbRefDb = null; @@ -574,6 +648,19 @@ public class SiftsClient implements SiftsClientI if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) { pdbRefDb = cRefDb; + if (firstPDBResNum == UNASSIGNED) + { + firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(), + UNASSIGNED); + } + else + { + if (isObserved) + { + // after we find the first observed residue we just increment + firstPDBResNum++; + } + } } if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) && isAccessionMatched(cRefDb.getDbAccessionId())) @@ -586,11 +673,45 @@ public class SiftsClient implements SiftsClientI } } } + if (!isObserved) + { + ++pdbeNonObservedCount; + } + if (seqCoordSys == seqCoordSys.PDB) // FIXME: is seqCoordSys ever PDBe + // ??? + { + // if the sequence has a primary reference to the PDB, then we are + // dealing with a sequence extracted directly from the PDB. In that + // case, numbering is PDBe - non-observed residues + currSeqIndex = seq.getStart() - 1 + pdbeIndex; + } + if (!isObserved) + { + if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only + // here + { + // mapping to PDB or PDBe so we need to bookkeep for the + // non-observed + // SEQRES positions + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; + } + } if (currSeqIndex == UNASSIGNED) { + // change in logic - unobserved residues with no currSeqIndex + // corresponding are still counted in both nonObservedShiftIndex and + // pdbeIndex... continue; } - if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) + // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) // + // true + // numbering + // is + // not + // up + // to + // seq.getEnd() { int resNum = (pdbRefDb == null) @@ -599,22 +720,18 @@ public class SiftsClient implements SiftsClientI : getLeadingIntegerValue(pdbRefDb.getDbResNum(), UNASSIGNED); - if (isResidueObserved(residue) - || seqCoordSys == CoordinateSys.UNIPROT) + if (isObserved) { char resCharCode = ResidueProperties .getSingleCharacterCode(ResidueProperties .getCanonicalAminoAcid(residue.getDbResName())); resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + + int[] mappingcols = new int[] { Integer.valueOf(resNum), + UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED }; + + mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols); } - else - { - omitNonObserved.add(currSeqIndex); - ++nonObservedShiftIndex; - } - mapping.put(currSeqIndex - nonObservedShiftIndex, - new int[] - { Integer.valueOf(resNum), UNASSIGNED }); } } } @@ -904,15 +1021,35 @@ public class SiftsClient implements SiftsClientI private int nonObservedShiftIndex; + /** + * count of number of 'not observed' positions in the PDB record's SEQRES + * (total number of residues with coordinates == length(SEQRES) - + * pdbeNonObserved + */ + private int pdbeNonObserved; + public SegmentHelperPojo(SequenceI seq, HashMap mapping, TreeMap resNumMap, - List omitNonObserved, int nonObservedShiftIndex) + List omitNonObserved, int nonObservedShiftIndex, + int pdbeNonObserved) { setSeq(seq); setMapping(mapping); setResNumMap(resNumMap); setOmitNonObserved(omitNonObserved); setNonObservedShiftIndex(nonObservedShiftIndex); + setPdbeNonObserved(pdbeNonObserved); + + } + + public void setPdbeNonObserved(int pdbeNonObserved2) + { + this.pdbeNonObserved = pdbeNonObserved2; + } + + public int getPdbeNonObserved() + { + return pdbeNonObserved; } public SequenceI getSeq() @@ -964,6 +1101,7 @@ public class SiftsClient implements SiftsClientI { this.nonObservedShiftIndex = nonObservedShiftIndex; } + } @Override