From: tcofoegbu Date: Mon, 16 Nov 2015 17:17:27 +0000 (+0000) Subject: JAL-1967 JAL-1479 refactored sequence<->structure mapping implementation X-Git-Tag: Release_2_10_0~338 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=4ff572f7cf17f5ca9c23f82bbe41005af8af55cc;p=jalview.git JAL-1967 JAL-1479 refactored sequence<->structure mapping implementation --- diff --git a/src/jalview/api/SiftsClientI.java b/src/jalview/api/SiftsClientI.java index 35cb57f..e2fac14 100644 --- a/src/jalview/api/SiftsClientI.java +++ b/src/jalview/api/SiftsClientI.java @@ -26,6 +26,7 @@ import jalview.ws.sifts.MappingOutputPojo; import jalview.ws.sifts.SiftsException; import jalview.xml.binding.sifts.Entry.Entity; +import java.util.HashMap; import java.util.HashSet; public interface SiftsClientI @@ -95,7 +96,7 @@ public interface SiftsClientI * @param accessionId * @return */ - public boolean isFoundInSiftsEntry(String accessionId); + public boolean isAccessionMatched(String accessionId); /** * Get the standard DB referenced by the SIFTs Entry @@ -138,6 +139,7 @@ public interface SiftsClientI * @return generated mapping * @throws Exception */ - public int[][] getGreedyMapping(String entityId, SequenceI seq, + public HashMap getGreedyMapping(String entityId, + SequenceI seq, java.io.PrintStream os) throws SiftsException; } \ No newline at end of file diff --git a/src/jalview/datamodel/AlignmentAnnotation.java b/src/jalview/datamodel/AlignmentAnnotation.java index 9bd1f2e..7990a5c 100755 --- a/src/jalview/datamodel/AlignmentAnnotation.java +++ b/src/jalview/datamodel/AlignmentAnnotation.java @@ -245,6 +245,7 @@ public class AlignmentAnnotation * * @see java.lang.Object#finalize() */ + @Override protected void finalize() throws Throwable { sequenceRef = null; @@ -1304,7 +1305,8 @@ public class AlignmentAnnotation * @note caller should add the remapped annotation to newref if they have not * already */ - public void remap(SequenceI newref, int[][] mapping, int from, int to, + public void remap(SequenceI newref, HashMap mapping, + int from, int to, int idxoffset) { if (mapping != null) @@ -1312,7 +1314,7 @@ public class AlignmentAnnotation Map old = sequenceMapping; Map remap = new HashMap(); int index = -1; - for (int mp[] : mapping) + for (int mp[] : mapping.values()) { if (index++ < 0) { diff --git a/src/jalview/structure/StructureMapping.java b/src/jalview/structure/StructureMapping.java index a62f1ae..3fcb5e9 100644 --- a/src/jalview/structure/StructureMapping.java +++ b/src/jalview/structure/StructureMapping.java @@ -23,6 +23,8 @@ package jalview.structure; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.SequenceI; +import java.util.HashMap; + public class StructureMapping { String mappingDetails; @@ -35,11 +37,19 @@ public class StructureMapping String pdbchain; - // Mapping index 0 is resNum, index 1 is atomNo - int[][] mapping; + public static final int UNASSIGNED_VALUE = -1; + + private static final int PDB_RES_NUM_INDEX = 0; + + private static final int PDB_ATOM_NUM_INDEX = 1; + + // Mapping key is residue index while value is an array containing PDB resNum, + // and atomNo + HashMap mapping; public StructureMapping(SequenceI seq, String pdbfile, String pdbid, - String chain, int[][] mapping, String mappingDetails) + String chain, HashMap mapping, + String mappingDetails) { sequence = seq; this.pdbfile = pdbfile; @@ -71,13 +81,14 @@ public class StructureMapping */ public int getAtomNum(int seqpos) { - if (mapping.length > seqpos) + int[] resNumAtomMap = mapping.get(seqpos); + if (resNumAtomMap != null) { - return mapping[seqpos][1]; + return resNumAtomMap[PDB_ATOM_NUM_INDEX]; } else { - return 0; + return UNASSIGNED_VALUE; } } @@ -88,13 +99,14 @@ public class StructureMapping */ public int getPDBResNum(int seqpos) { - if (mapping.length > seqpos) + int[] resNumAtomMap = mapping.get(seqpos); + if (resNumAtomMap != null) { - return mapping[seqpos][0]; + return resNumAtomMap[PDB_RES_NUM_INDEX]; } else { - return 0; + return UNASSIGNED_VALUE; } } @@ -105,14 +117,14 @@ public class StructureMapping */ public int getSeqPos(int pdbResNum) { - for (int i = 0; i < mapping.length; i++) + for (Integer seqPos : mapping.keySet()) { - if (mapping[i][0] == pdbResNum) + if (pdbResNum == getPDBResNum(seqPos)) { - return i; + return seqPos; } } - return -1; + return UNASSIGNED_VALUE; } /** diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index 1541a6a..e9053ed 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -588,9 +588,7 @@ public class StructureSelectionManager .getMappingFromS1(false); maxChain.transferRESNUMFeatures(seq, null); - // allocate enough slots to store the mapping from positions in - // sequence[s] to the associated chain - int[][] mapping = new int[seq.findPosition(seq.getLength()) + 2][2]; + HashMap mapping = new HashMap(); int resNum = -10000; int index = 0; @@ -604,8 +602,8 @@ public class StructureSelectionManager { // TODO (JAL-1836) address root cause: negative residue no in PDB // file - mapping[tmp.alignmentMapping + 1][0] = tmp.resNumber; - mapping[tmp.alignmentMapping + 1][1] = tmp.atomIndex; + mapping.put(tmp.alignmentMapping + 1, new int[] { tmp.resNumber, + tmp.atomIndex }); } } diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 245d38f..106a66e 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -50,6 +50,7 @@ import java.net.URLConnection; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.TreeMap; @@ -89,6 +90,10 @@ public class SiftsClient implements SiftsClientI private static final int PDB_ATOM_POS = 1; + private static final String NOT_FOUND = "Not_Found"; + + private static final String NOT_OBSERVED = "Not_Observed"; + private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System @@ -139,7 +144,7 @@ public class SiftsClient implements SiftsClientI }; /** - * Fetch SIFTs file for the given PDB Id and construct an instance of + * Fetch SIFTs file for the given PDBfile and construct an instance of * SiftsClient * * @param pdbId @@ -154,8 +159,8 @@ public class SiftsClient implements SiftsClientI } /** - * Construct an instance of SiftsClient using the supplied SIFTs file - the - * SIFTs file should correspond to the given PDB Id + * Construct an instance of SiftsClient using the supplied SIFTs file. Note: + * The SIFTs file should correspond to the PDB Id in PDBfile instance * * @param pdbId * @param siftsFile @@ -214,7 +219,8 @@ public class SiftsClient implements SiftsClientI } /** - * Get a SIFTs XML file for a given PDB Id + * Get a SIFTs XML file for a given PDB Id from Cache or download from FTP + * repository if not found in cache * * @param pdbId * @return SIFTs XML file @@ -237,7 +243,7 @@ public class SiftsClient implements SiftsClientI } /** - * Download a SIFTs XML file for a given PDB Id + * Download a SIFTs XML file for a given PDB Id from an FTP repository * * @param pdbId * @return downloaded SIFTs XML file @@ -354,8 +360,8 @@ public class SiftsClient implements SiftsClientI /** - * Check that the DBRef Entry is properly populated and is available in the - * instantiated SIFTs Entry + * Check that the DBRef Entry is properly populated and is available in this + * SiftClient instance * * @param entry * - DBRefEntry to validate @@ -411,7 +417,7 @@ public class SiftsClient implements SiftsClientI mappingDetails.append(NEWLINE); } }; - int[][] mapping = getGreedyMapping(chain, seq, ps); + HashMap mapping = getGreedyMapping(chain, seq, ps); String mappingOutput = mappingDetails.toString(); StructureMapping siftsMapping = new StructureMapping(seq, pdbFile, @@ -421,17 +427,19 @@ public class SiftsClient implements SiftsClientI } @Override - public int[][] getGreedyMapping(String entityId, SequenceI seq, + public HashMap getGreedyMapping(String entityId, SequenceI seq, java.io.PrintStream os) throws SiftsException { + ArrayList omitNonObserved = new ArrayList(); + int nonObservedShiftIndex = 0; System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); String originalSeq = AlignSeq.extractGaps( jalview.util.Comparison.GapChars, seq.getSequenceAsString()); - int mapping[][] = new int[originalSeq.length() + seq.getStart()][2]; + HashMap mapping = new HashMap(); DBRefEntryI sourceDBRef = seq.getSourceDBRef(); if (sourceDBRef == null) { @@ -456,12 +464,6 @@ public class SiftsClient implements SiftsClientI curDBRefAccessionIdsString = dbRefAccessionIdsString; curSourceDBRef = sourceDBRef.getAccessionId(); - // initialise all mapping positions to unassigned - for (int residuePos[] : mapping) - { - residuePos[PDB_RES_POS] = UNASSIGNED; - residuePos[PDB_ATOM_POS] = UNASSIGNED; - } TreeMap resNumMap = new TreeMap(); List segments = entity.getSegment(); for (Segment segment : segments) @@ -483,7 +485,7 @@ public class SiftsClient implements SiftsClientI } if (cRefDb.getDbCoordSys() .equalsIgnoreCase(seqCoordSys.getName()) - && hasAccessionId(cRefDb.getDbAccessionId())) + && isAccessionMatched(cRefDb.getDbAccessionId())) { String resNumIndexString = cRefDb.getDbResNum() .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED) @@ -512,17 +514,21 @@ public class SiftsClient implements SiftsClientI .getDbResNum()) : Integer.valueOf(pdbRefDb .getDbResNum().split("[a-zA-Z]")[0]); } - try + + if (isResidueObserved(residue) + || seqCoordSys == CoordinateSys.UNIPROT) { - mapping[currSeqIndex][PDB_RES_POS] = Integer - .valueOf(resNum); - } catch (ArrayIndexOutOfBoundsException e) + char resCharCode = ResidueProperties + .getSingleCharacterCode(residue.getDbResName()); + resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + } + else { - // do nothing.. + omitNonObserved.add(currSeqIndex); + ++nonObservedShiftIndex; } - char resCharCode = ResidueProperties - .getSingleCharacterCode(residue.getDbResName()); - resNumMap.put(currSeqIndex, String.valueOf(resCharCode)); + mapping.put(currSeqIndex - nonObservedShiftIndex, new int[] { + Integer.valueOf(resNum), UNASSIGNED }); } } } @@ -533,40 +539,29 @@ public class SiftsClient implements SiftsClientI { e.printStackTrace(); } - padWithGaps(resNumMap); - int counter = 0; + padWithGaps(resNumMap, omitNonObserved); int seqStart = UNASSIGNED; int seqEnd = UNASSIGNED; int pdbStart = UNASSIGNED; int pdbEnd = UNASSIGNED; - boolean startDetected = false; - for (int[] x : mapping) - { - if (!startDetected && x[PDB_RES_POS] != UNASSIGNED) - { - seqStart = counter; - startDetected = true; - // System.out.println("Seq start: "+ seqStart); - } - if (startDetected && x[PDB_RES_POS] != UNASSIGNED) - { - seqEnd = counter; - } - ++counter; - } + Integer[] keys = mapping.keySet().toArray(new Integer[0]); + Arrays.sort(keys); + seqStart = keys[0]; + seqEnd = keys[keys.length - 1]; String matchedSeq = originalSeq; if (seqStart != UNASSIGNED) { - seqEnd = (seqEnd == UNASSIGNED) ? counter : seqEnd; - pdbStart = mapping[seqStart][PDB_RES_POS]; - pdbEnd = mapping[seqEnd][PDB_RES_POS]; + pdbStart = mapping.get(seqStart)[PDB_RES_POS]; + pdbEnd = mapping.get(seqEnd)[PDB_RES_POS]; int orignalSeqStart = seq.getStart(); if (orignalSeqStart >= 1) { int subSeqStart = seqStart - orignalSeqStart; int subSeqEnd = seqEnd - (orignalSeqStart - 1); + subSeqEnd = originalSeq.length() < subSeqEnd ? originalSeq.length() + : subSeqEnd; matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd); } } @@ -596,6 +591,12 @@ public class SiftsClient implements SiftsClientI return mapping; } + /** + * Checks if the residue instance is marked 'Not_observed' or not + * + * @param residue + * @return + */ private boolean isResidueObserved(Residue residue) { String annotation = getResidueAnnotaiton(residue, @@ -604,14 +605,21 @@ public class SiftsClient implements SiftsClientI { return true; } - if (!annotation.equalsIgnoreCase("Not_Found") - && annotation.equalsIgnoreCase("Not_Observed")) + if (!annotation.equalsIgnoreCase(NOT_FOUND) + && annotation.equalsIgnoreCase(NOT_OBSERVED)) { return false; } return true; } + /** + * Get annotation String for a given residue and annotation type + * + * @param residue + * @param type + * @return + */ private String getResidueAnnotaiton(Residue residue, ResidueDetailType type) { @@ -623,29 +631,30 @@ public class SiftsClient implements SiftsClientI return resDetail.getContent(); } } - return "Not_Found"; + return NOT_FOUND; } - private boolean hasAccessionId(String accession) + @Override + public boolean isAccessionMatched(String accession) { boolean isStrictMatch = true; return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession) : curDBRefAccessionIdsString.contains(accession.toLowerCase()); } - @Override - public boolean isFoundInSiftsEntry(String accessionId) + private boolean isFoundInSiftsEntry(String accessionId) { return accessionId != null && getAllMappingAccession().contains(accessionId); } /** - * Pads missing positions with gaps + * Pad omitted residue positions in PDB sequence with gaps * * @param resNumMap */ - void padWithGaps(TreeMap resNumMap) + void padWithGaps(TreeMap resNumMap, + ArrayList omitNonObserved) { if (resNumMap == null || resNumMap.isEmpty()) { @@ -659,13 +668,14 @@ public class SiftsClient implements SiftsClientI System.out.println("Max value " + lastIndex); for (int x = firstIndex; x <= lastIndex; x++) { - if (!resNumMap.containsKey(x)) + if (!resNumMap.containsKey(x) && !omitNonObserved.contains(x)) { resNumMap.put(x, "-"); } } } + /** * * @param chainId @@ -675,7 +685,7 @@ public class SiftsClient implements SiftsClientI * @throws IllegalArgumentException * Thrown if chainId or mapping is null */ - void populateAtomPositions(String chainId, int[][] mapping) + void populateAtomPositions(String chainId, HashMap mapping) throws IllegalArgumentException { PDBChain chain = pdb.findChain(chainId); @@ -684,7 +694,7 @@ public class SiftsClient implements SiftsClientI throw new IllegalArgumentException( "Chain id or mapping must not be null."); } - for (int[] map : mapping) + for (int[] map : mapping.values()) { if (map[PDB_RES_POS] != UNASSIGNED) { diff --git a/test/jalview/ws/sifts/SiftsClientTest.java b/test/jalview/ws/sifts/SiftsClientTest.java index 4a57a88..24c6751 100644 --- a/test/jalview/ws/sifts/SiftsClientTest.java +++ b/test/jalview/ws/sifts/SiftsClientTest.java @@ -27,6 +27,7 @@ import jalview.datamodel.SequenceI; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.PrintStream; +import java.util.HashMap; import org.testng.Assert; import org.testng.FileAssert; @@ -52,32 +53,18 @@ public class SiftsClientTest int u = SiftsClient.UNASSIGNED; - int[][] expectedMapping = { { u, u }, { u, u }, { u, u }, { u, u }, - { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, - { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, - { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, - { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, - { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, - { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, - { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { 1, u }, { 2, u }, - { 3, u }, { 4, u }, { 5, u }, { 6, u }, { 7, u }, { 8, u }, { 9, u }, - { 10, u }, { 11, u }, { 12, u }, { 13, u }, { 14, u }, { 15, u }, - { 16, u }, { 17, u }, { 18, u }, { 19, u }, { 20, u }, { 21, u }, - { 22, u }, { 23, u }, { 24, u }, { 25, u }, { 26, u }, { 27, u }, - { 28, u }, { 29, u }, { 30, u }, { 31, u }, { 32, u }, { 33, u }, - { 34, u }, { 35, u }, { 36, u }, { 37, u }, { 38, u }, { 39, u }, - { 40, u }, { 41, u }, { 42, u }, { 43, u }, { 44, u }, { 45, u }, - { 46, u }, { 47, u }, { 48, u }, { 49, u }, { 50, u }, { 51, u }, - { 52, u }, { 53, u }, { 54, u }, { 55, u }, { 56, u }, { 57, u }, - { 58, u }, { 59, u }, { 60, u }, { 61, u }, { 62, u }, { 63, u }, - { 64, u }, { 65, u }, { 66, u }, { 67, u }, { 68, u }, { 69, u }, - { 70, u }, { 71, u }, { 72, u }, { 73, u }, { 74, u }, { 75, u }, - { 76, u }, { 77, u }, { 78, u }, { 79, u }, { 80, u }, { 81, u }, - { 82, u }, { 83, u }, { 84, u }, { 85, u }, { 86, u }, { 87, u }, - { 88, u }, { 89, u }, { 90, u }, { 91, u }, { 92, u }, { 93, u }, - { 94, u }, { 95, u }, { 96, u }, { 97, u } }; + HashMap expectedMapping = new HashMap(); @BeforeTest(alwaysRun = true) + public void populateExpectedMapping() throws SiftsException + { + for (int x = 1; x <= 97; x++) + { + expectedMapping.put(50 + x, new int[] { x, u }); + } + } + + @BeforeTest(alwaysRun = true) public void setUpSiftsClient() throws SiftsException { // SIFTs entries are updated weekly - so use saved SIFTs file to enforce @@ -155,7 +142,8 @@ public class SiftsClientTest try { - int[][] actualMapping = siftsClient.getGreedyMapping("A", testSeq, + HashMap actualMapping = siftsClient.getGreedyMapping( + "A", testSeq, null); Assert.assertEquals(actualMapping, expectedMapping); Assert.assertEquals(testSeq.getStart(), 1);