X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fws%2Fsifts%2FSiftsClient.java;h=b45d0072ce2c11e63735cb7d5268dcebfd3d0dcd;hb=27555f103dbf71e346da16206f784f14e20af3d0;hp=6fe8597a40ceeb46431a2cd5ad941a9fdf2170e1;hpb=fc7f615a5a4d53b9d71f43dcf02c7a77d9ab41e8;p=jalview.git diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 6fe8597..b45d007 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -71,8 +71,8 @@ import javax.xml.bind.Unmarshaller; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamReader; -import MCview.Atom; -import MCview.PDBChain; +import mc_view.Atom; +import mc_view.PDBChain; public class SiftsClient implements SiftsClientI { @@ -82,29 +82,39 @@ public class SiftsClient implements SiftsClientI */ private static File mockSiftsFile; - private Entry siftsEntry; + private static final int BUFFER_SIZE = 4096; - private StructureFile pdb; + public static final int UNASSIGNED = Integer.MIN_VALUE; - private String pdbId; + private static final int PDB_RES_POS = 0; - private String structId; + private static final int PDB_ATOM_POS = 1; - private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; + private static final int PDBE_POS = 2; - private static final int BUFFER_SIZE = 4096; + private static final String NOT_OBSERVED = "Not_Observed"; - public static final int UNASSIGNED = -1; + protected static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; - private static final int PDB_RES_POS = 0; + protected final static String NEWLINE = System.lineSeparator(); - private static final int PDB_ATOM_POS = 1; + private Entry siftsEntry; - private static final String NOT_OBSERVED = "Not_Observed"; + private StructureFile pdb; + + private String pdbId; + + private String structId; - private static final String SIFTS_FTP_BASE_URL = "https://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; + private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT; - private final static String NEWLINE = System.lineSeparator(); + /** + * PDB sequence position to sequence coordinate mapping as derived from SIFTS + * record for the identified SeqCoordSys Used for lift-over from sequence + * derived from PDB (with first extracted PDBRESNUM as 'start' to the sequence + * being annotated with PDB data + */ + private jalview.datamodel.Mapping seqFromPdbMapping; private String curSourceDBRef; @@ -390,7 +400,7 @@ public class SiftsClient implements SiftsClientI @Override public HashSet getAllMappingAccession() { - HashSet accessions = new HashSet(); + HashSet accessions = new HashSet<>(); List entities = siftsEntry.getEntity(); for (Entity entity : entities) { @@ -413,6 +423,11 @@ public class SiftsClient implements SiftsClientI public StructureMapping getSiftsStructureMapping(SequenceI seq, String pdbFile, String chain) throws SiftsException { + SequenceI aseq = seq; + while (seq.getDatasetSequence() != null) + { + seq = seq.getDatasetSequence(); + } structId = (chain == null) ? pdbId : pdbId + "|" + chain; System.out.println("Getting SIFTS mapping for " + structId + ": seq " + seq.getName()); @@ -435,8 +450,9 @@ public class SiftsClient implements SiftsClientI HashMap mapping = getGreedyMapping(chain, seq, ps); String mappingOutput = mappingDetails.toString(); - StructureMapping siftsMapping = new StructureMapping(seq, pdbFile, - pdbId, chain, mapping, mappingOutput); + StructureMapping siftsMapping = new StructureMapping(aseq, pdbFile, + pdbId, chain, mapping, mappingOutput, seqFromPdbMapping); + return siftsMapping; } @@ -444,14 +460,14 @@ public class SiftsClient implements SiftsClientI public HashMap getGreedyMapping(String entityId, SequenceI seq, java.io.PrintStream os) throws SiftsException { - List omitNonObserved = new ArrayList(); - int nonObservedShiftIndex = 0; + List omitNonObserved = new ArrayList<>(); + int nonObservedShiftIndex = 0,pdbeNonObserved=0; // System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); String originalSeq = AlignSeq.extractGaps( jalview.util.Comparison.GapChars, seq.getSequenceAsString()); - HashMap mapping = new HashMap(); + HashMap mapping = new HashMap<>(); DBRefEntryI sourceDBRef; sourceDBRef = getValidSourceDBRef(seq); // TODO ensure sequence start/end is in the same coordinate system and @@ -463,7 +479,7 @@ public class SiftsClient implements SiftsClientI seqCoordSys = CoordinateSys.PDB; } - HashSet dbRefAccessionIdsString = new HashSet(); + HashSet dbRefAccessionIdsString = new HashSet<>(); for (DBRefEntry dbref : seq.getDBRefs()) { dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase()); @@ -473,10 +489,10 @@ public class SiftsClient implements SiftsClientI curDBRefAccessionIdsString = dbRefAccessionIdsString; curSourceDBRef = sourceDBRef.getAccessionId(); - TreeMap resNumMap = new TreeMap(); + TreeMap resNumMap = new TreeMap<>(); List segments = entity.getSegment(); SegmentHelperPojo shp = new SegmentHelperPojo(seq, mapping, resNumMap, - omitNonObserved, nonObservedShiftIndex); + omitNonObserved, nonObservedShiftIndex,pdbeNonObserved); processSegments(segments, shp); try { @@ -498,15 +514,61 @@ public class SiftsClient implements SiftsClientI { throw new SiftsException("SIFTS mapping failed"); } + // also construct a mapping object between the seq-coord sys and the PDB seq's coord sys Integer[] keys = mapping.keySet().toArray(new Integer[0]); Arrays.sort(keys); seqStart = keys[0]; seqEnd = keys[keys.length - 1]; - + List from=new ArrayList<>(),to=new ArrayList<>(); + int[]_cfrom=null,_cto=null; String matchedSeq = originalSeq; - if (seqStart != UNASSIGNED) + if (seqStart != UNASSIGNED) // fixme! seqStart can map to -1 for a pdb sequence that starts <-1 { + for (int seqps:keys) + { + int pdbpos = mapping.get(seqps)[PDBE_POS]; + if (pdbpos == UNASSIGNED) + { + // not correct - pdbpos might be -1, but leave it for now + continue; + } + if (_cfrom==null || seqps!=_cfrom[1]+1) + { + _cfrom = new int[] { seqps,seqps}; + from.add(_cfrom); + _cto = null; // discontinuity + } else { + _cfrom[1]= seqps; + } + if (_cto==null || pdbpos!=1+_cto[1]) + { + _cto = new int[] { pdbpos,pdbpos}; + to.add(_cto); + } else { + _cto[1] = pdbpos; + } + } + _cfrom = new int[from.size() * 2]; + _cto = new int[to.size() * 2]; + int p = 0; + for (int[] range : from) + { + _cfrom[p++] = range[0]; + _cfrom[p++] = range[1]; + } + ; + p = 0; + for (int[] range : to) + { + _cto[p++] = range[0]; + _cto[p++] = range[1]; + } + ; + + seqFromPdbMapping = new jalview.datamodel.Mapping(null, _cto, _cfrom, + 1, + 1); pdbStart = mapping.get(seqStart)[PDB_RES_POS]; pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; int orignalSeqStart = seq.getStart(); @@ -559,6 +621,8 @@ public class SiftsClient implements SiftsClientI TreeMap resNumMap = shp.getResNumMap(); List omitNonObserved = shp.getOmitNonObserved(); int nonObservedShiftIndex = shp.getNonObservedShiftIndex(); + int pdbeNonObservedCount = shp.getPdbeNonObserved(); + int firstPDBResNum = UNASSIGNED; for (Segment segment : segments) { // System.out.println("Mapping segments : " + segment.getSegId() + "\\"s @@ -566,6 +630,9 @@ public class SiftsClient implements SiftsClientI List residues = segment.getListResidue().getResidue(); for (Residue residue : residues) { + boolean isObserved = isResidueObserved(residue); + int pdbeIndex = getLeadingIntegerValue(residue.getDbResNum(), + UNASSIGNED); int currSeqIndex = UNASSIGNED; List cRefDbs = residue.getCrossRefDb(); CrossRefDb pdbRefDb = null; @@ -574,6 +641,19 @@ public class SiftsClient implements SiftsClientI if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB)) { pdbRefDb = cRefDb; + if (firstPDBResNum == UNASSIGNED) + { + firstPDBResNum = getLeadingIntegerValue(cRefDb.getDbResNum(), + UNASSIGNED); + } + else + { + if (isObserved) + { + // after we find the first observed residue we just increment + firstPDBResNum++; + } + } } if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys.getName()) && isAccessionMatched(cRefDb.getDbAccessionId())) @@ -586,11 +666,45 @@ public class SiftsClient implements SiftsClientI } } } + if (!isObserved) + { + ++pdbeNonObservedCount; // TODO this value is never used + } + if (seqCoordSys == CoordinateSys.PDB) // FIXME: is seqCoordSys ever PDBe + // ??? + { + // if the sequence has a primary reference to the PDB, then we are + // dealing with a sequence extracted directly from the PDB. In that + // case, numbering is PDBe - non-observed residues + currSeqIndex = seq.getStart() - 1 + pdbeIndex; + } + if (!isObserved) + { + if (seqCoordSys != CoordinateSys.UNIPROT) // FIXME: PDB or PDBe only + // here + { + // mapping to PDB or PDBe so we need to bookkeep for the + // non-observed + // SEQRES positions + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; + } + } if (currSeqIndex == UNASSIGNED) { + // change in logic - unobserved residues with no currSeqIndex + // corresponding are still counted in both nonObservedShiftIndex and + // pdbeIndex... continue; } - if (currSeqIndex >= seq.getStart() && currSeqIndex <= seq.getEnd()) + // if (currSeqIndex >= seq.getStart() && currSeqIndex <= seqlength) // + // true + // numbering + // is + // not + // up + // to + // seq.getEnd() { int resNum = (pdbRefDb == null) @@ -599,22 +713,18 @@ public class SiftsClient implements SiftsClientI : getLeadingIntegerValue(pdbRefDb.getDbResNum(), UNASSIGNED); - if (isResidueObserved(residue) - || seqCoordSys == CoordinateSys.UNIPROT) + if (isObserved) { char resCharCode = ResidueProperties .getSingleCharacterCode(ResidueProperties .getCanonicalAminoAcid(residue.getDbResName())); resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + + int[] mappingcols = new int[] { Integer.valueOf(resNum), + UNASSIGNED, isObserved ? firstPDBResNum : UNASSIGNED }; + + mapping.put(currSeqIndex - nonObservedShiftIndex, mappingcols); } - else - { - omitNonObserved.add(currSeqIndex); - ++nonObservedShiftIndex; - } - mapping.put(currSeqIndex - nonObservedShiftIndex, - new int[] - { Integer.valueOf(resNum), UNASSIGNED }); } } } @@ -714,7 +824,7 @@ public class SiftsClient implements SiftsClientI */ private boolean isResidueObserved(Residue residue) { - Set annotations = getResidueAnnotaitons(residue, + Set annotations = getResidueAnnotations(residue, ResidueDetailType.ANNOTATION); if (annotations == null || annotations.isEmpty()) { @@ -737,10 +847,10 @@ public class SiftsClient implements SiftsClientI * @param type * @return */ - private Set getResidueAnnotaitons(Residue residue, + private Set getResidueAnnotations(Residue residue, ResidueDetailType type) { - HashSet foundAnnotations = new HashSet(); + HashSet foundAnnotations = new HashSet<>(); List resDetails = residue.getResidueDetail(); for (ResidueDetail resDetail : resDetails) { @@ -885,6 +995,10 @@ public class SiftsClient implements SiftsClientI public int resCount; + protected SiftsEntitySortPojo() + { + } + @Override public int compareTo(SiftsEntitySortPojo o) { @@ -904,15 +1018,35 @@ public class SiftsClient implements SiftsClientI private int nonObservedShiftIndex; + /** + * count of number of 'not observed' positions in the PDB record's SEQRES + * (total number of residues with coordinates == length(SEQRES) - + * pdbeNonObserved + */ + private int pdbeNonObserved; + public SegmentHelperPojo(SequenceI seq, HashMap mapping, TreeMap resNumMap, - List omitNonObserved, int nonObservedShiftIndex) + List omitNonObserved, int nonObservedShiftIndex, + int pdbeNonObserved) { setSeq(seq); setMapping(mapping); setResNumMap(resNumMap); setOmitNonObserved(omitNonObserved); setNonObservedShiftIndex(nonObservedShiftIndex); + setPdbeNonObserved(pdbeNonObserved); + + } + + public void setPdbeNonObserved(int pdbeNonObserved2) + { + this.pdbeNonObserved = pdbeNonObserved2; + } + + public int getPdbeNonObserved() + { + return pdbeNonObserved; } public SequenceI getSeq() @@ -964,6 +1098,7 @@ public class SiftsClient implements SiftsClientI { this.nonObservedShiftIndex = nonObservedShiftIndex; } + } @Override