From 19b5db52eb1285a22f8225f4baa914c2d7b15edd Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 1 May 2017 13:51:13 +0100 Subject: [PATCH] JAL-2434 omit unmapped positions from mapping --- src/jalview/structure/StructureMapping.java | 59 ++++++------ src/jalview/structure/StructureMappingClient.java | 50 ++++++++--- src/jalview/ws/phyre2/Phyre2Client.java | 30 +++---- src/jalview/ws/sifts/SiftsClient.java | 8 +- test/jalview/ws/phyre2/Phyre2ClientTest.java | 100 ++------------------- test/jalview/ws/sifts/SiftsClientTest.java | 2 +- 6 files changed, 94 insertions(+), 155 deletions(-) diff --git a/src/jalview/structure/StructureMapping.java b/src/jalview/structure/StructureMapping.java index d6eb1eb..b8a0fc6 100644 --- a/src/jalview/structure/StructureMapping.java +++ b/src/jalview/structure/StructureMapping.java @@ -26,12 +26,19 @@ import jalview.datamodel.SequenceI; import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Map.Entry; public class StructureMapping { + public static final int UNASSIGNED = -1; + + public static final int PDB_RES_NUM_INDEX = 0; + + public static final int PDB_ATOM_NUM_INDEX = 1; + /** - * Space character constant, recommended for consistent representation when no - * chain specified + * Space character constant, for consistent representation when no chain + * specified */ public static String NO_CHAIN = " "; @@ -45,12 +52,6 @@ public class StructureMapping String pdbchain; - public static final int UNASSIGNED_VALUE = -1; - - private static final int PDB_RES_NUM_INDEX = 0; - - private static final int PDB_ATOM_NUM_INDEX = 1; - // Mapping key is residue index while value is an array containing PDB resNum, // and atomNo HashMap mapping; @@ -95,39 +96,31 @@ public class StructureMapping } /** + * Answers the structure atom number mapped to the given sequence position, or + * -1 if no mapping * * @param seqpos - * @return 0 or corresponding atom number for the sequence position + * @return */ public int getAtomNum(int seqpos) { int[] resNumAtomMap = mapping.get(seqpos); - if (resNumAtomMap != null) - { - return resNumAtomMap[PDB_ATOM_NUM_INDEX]; - } - else - { - return UNASSIGNED_VALUE; - } + return (resNumAtomMap == null ? UNASSIGNED + : resNumAtomMap[PDB_ATOM_NUM_INDEX]); } /** + * Answers the structure residue number mapped to the given sequence position, + * or -1 if no mapping * * @param seqpos - * @return 0 or the corresponding residue number for the sequence position + * @return */ public int getPDBResNum(int seqpos) { int[] resNumAtomMap = mapping.get(seqpos); - if (resNumAtomMap != null) - { - return resNumAtomMap[PDB_RES_NUM_INDEX]; - } - else - { - return UNASSIGNED_VALUE; - } + return (resNumAtomMap == null ? UNASSIGNED + : resNumAtomMap[PDB_RES_NUM_INDEX]); } /** @@ -147,7 +140,7 @@ public class StructureMapping for (int i = fromSeqPos; i <= toSeqPos; i++) { int resNo = getPDBResNum(i); - if (resNo == UNASSIGNED_VALUE) + if (resNo == UNASSIGNED) { continue; // no mapping from this sequence position } @@ -195,20 +188,22 @@ public class StructureMapping } /** + * Answers the sequence position mapped to the given structure residue number, + * or -1 if no mapping is found * * @param pdbResNum - * @return -1 or the corresponding sequence position for a pdb residue number + * @return */ public int getSeqPos(int pdbResNum) { - for (Integer seqPos : mapping.keySet()) + for (Entry map : mapping.entrySet()) { - if (pdbResNum == getPDBResNum(seqPos)) + if (pdbResNum == map.getValue()[PDB_RES_NUM_INDEX]) { - return seqPos; + return map.getKey(); } } - return UNASSIGNED_VALUE; + return UNASSIGNED; } /** diff --git a/src/jalview/structure/StructureMappingClient.java b/src/jalview/structure/StructureMappingClient.java index 1e78c3c..bcc4d46 100644 --- a/src/jalview/structure/StructureMappingClient.java +++ b/src/jalview/structure/StructureMappingClient.java @@ -4,26 +4,36 @@ import jalview.datamodel.SequenceI; import jalview.io.StructureFile; import jalview.structures.models.MappingOutputModel; +import java.util.ArrayList; import java.util.Collection; +import java.util.List; import java.util.Map; +import java.util.Map.Entry; import MCview.Atom; import MCview.PDBChain; public abstract class StructureMappingClient { - protected StructureFile structureFile; - public static final int UNASSIGNED = -1; - - private static final int PDB_RES_POS = 0; + private static final int PDB_RES_POS = StructureMapping.PDB_RES_NUM_INDEX; // 0 - private static final int PDB_ATOM_POS = 1; + private static final int PDB_ATOM_POS = StructureMapping.PDB_ATOM_NUM_INDEX; // 1 + /** + * Populates the atom positions mapped to by finding the atom number (if any) + * for each structure residue number in the map. If no atom is found (as is + * the case for residues missing in a PDB file), then deletes the residue from + * the map. + * + * @param chainId + * @param mapping + * @throws IllegalArgumentException + * @throws StructureMappingException + */ public void populateAtomPositions(String chainId, - Map mapping) - throws IllegalArgumentException, + Map mapping) throws IllegalArgumentException, StructureMappingException { try @@ -35,13 +45,31 @@ public abstract class StructureMappingClient throw new IllegalArgumentException( "Chain id or mapping must not be null."); } - for (int[] map : mapping.values()) + + List notFound = new ArrayList(); + for (Entry map : mapping.entrySet()) { - if (map[PDB_RES_POS] != UNASSIGNED) + int structureResNo = map.getValue()[PDB_RES_POS]; + + /* + * find the atom number in the chain for this residue number + * NB only CA or P atoms have been saved in the chain - see + * JmolParser.convertSignificantAtoms + */ + int atomIndex = getAtomIndex(structureResNo, chain.atoms); + if (atomIndex == StructureMapping.UNASSIGNED) + { + notFound.add(map.getKey()); + } + else { - map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms); + map.getValue()[PDB_ATOM_POS] = atomIndex; } } + for (Integer missing : notFound) + { + mapping.remove(missing); + } } catch (Exception e) { throw new StructureMappingException( @@ -71,7 +99,7 @@ public abstract class StructureMappingClient return atom.atomIndex; } } - return UNASSIGNED; + return StructureMapping.UNASSIGNED; } public class StructureMappingException extends Exception diff --git a/src/jalview/ws/phyre2/Phyre2Client.java b/src/jalview/ws/phyre2/Phyre2Client.java index e4f67bb..80dc841 100644 --- a/src/jalview/ws/phyre2/Phyre2Client.java +++ b/src/jalview/ws/phyre2/Phyre2Client.java @@ -16,7 +16,6 @@ import jalview.util.Comparison; import jalview.util.Format; import java.io.BufferedReader; -import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.PrintStream; @@ -31,10 +30,6 @@ public class Phyre2Client extends StructureMappingClient { private final static String NEWLINE = System.lineSeparator(); - public static final int UNASSIGNED = -1; - - private final static String PATH_SEPARATOR = File.separator; - private String fastaMappingFile; public Phyre2Client(StructureFile structureFile) @@ -105,24 +100,25 @@ public class Phyre2Client extends StructureMappingClient tStructureRes.setEnd(structureFile.getSeqsAsArray()[0].getEnd()); try { - int sequenceResLenght = tSequenceRes.getLength(); - int structureResLenght = tStructureRes.getLength(); - if (sequenceResLenght == structureResLenght) + int sequenceResLength = tSequenceRes.getLength(); + int structureResLength = tStructureRes.getLength(); + if (sequenceResLength == structureResLength) { int prevStructResNum = -1; - int alignmentLenght = sequenceResLenght + tSequenceRes.getStart(); - for (int x = 0; x < alignmentLenght; x++) + int alignmentLength = sequenceResLength + tSequenceRes.getStart(); + for (int x = 0; x < alignmentLength; x++) { int alignSeqResidueIndex = tSequenceRes.findIndex(x); int structResNum = tStructureRes .findPosition(alignSeqResidueIndex); int sequenceResNum = tSequenceRes .findPosition(alignSeqResidueIndex - 1); - boolean sameResNum = (structResNum == prevStructResNum); - // System.out.println(sequenceResNum + " : " - // + (sameResNum ? -1 : prevStructResNum)); - mapping.put(sequenceResNum, new int[] { - sameResNum ? -1 : prevStructResNum, -1 }); + if (structResNum != prevStructResNum) + { + // System.out.println(sequenceResNum + " : " + prevStructResNum); + mapping.put(sequenceResNum, new int[] { prevStructResNum, + StructureMapping.UNASSIGNED }); + } prevStructResNum = structResNum; } } @@ -131,6 +127,10 @@ public class Phyre2Client extends StructureMappingClient e.printStackTrace(); } + /* + * now populate atom positions for structure residues (and remove + * residue if atom position cannot be found) + */ try { populateAtomPositions(" ", mapping); diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 1328260..30ac0ae 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -79,6 +79,10 @@ public class SiftsClient extends StructureMappingClient implements */ private static File mockSiftsFile; + private static final int UNASSIGNED = StructureMapping.UNASSIGNED; // -1 + + private static final int PDB_RES_POS = StructureMapping.PDB_RES_NUM_INDEX; // 0 + private Entry siftsEntry; private String pdbId; @@ -89,10 +93,6 @@ public class SiftsClient extends StructureMappingClient implements private static final int BUFFER_SIZE = 4096; - public static final int UNASSIGNED = -1; - - private static final int PDB_RES_POS = 0; - private static final String NOT_OBSERVED = "Not_Observed"; private static final String SIFTS_FTP_BASE_URL = "http://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; diff --git a/test/jalview/ws/phyre2/Phyre2ClientTest.java b/test/jalview/ws/phyre2/Phyre2ClientTest.java index 1e9cf70..394ea69 100644 --- a/test/jalview/ws/phyre2/Phyre2ClientTest.java +++ b/test/jalview/ws/phyre2/Phyre2ClientTest.java @@ -33,55 +33,7 @@ public class Phyre2ClientTest + "EEGWVLTCVAYPQSDVTIETHKEAELVG", 1, 144); HashMap expectedMapping = new HashMap(); - - expectedMapping.put(1, new int[] { -1, -1 }); - expectedMapping.put(2, new int[] { -1, -1 }); - expectedMapping.put(3, new int[] { -1, -1 }); - expectedMapping.put(4, new int[] { -1, -1 }); - expectedMapping.put(5, new int[] { -1, -1 }); - expectedMapping.put(6, new int[] { -1, -1 }); - expectedMapping.put(7, new int[] { -1, -1 }); - expectedMapping.put(8, new int[] { -1, -1 }); - expectedMapping.put(9, new int[] { -1, -1 }); - expectedMapping.put(10, new int[] { -1, -1 }); - expectedMapping.put(11, new int[] { -1, -1 }); - expectedMapping.put(12, new int[] { -1, -1 }); - expectedMapping.put(13, new int[] { -1, -1 }); - expectedMapping.put(14, new int[] { -1, -1 }); - expectedMapping.put(15, new int[] { -1, -1 }); - expectedMapping.put(16, new int[] { -1, -1 }); - expectedMapping.put(17, new int[] { -1, -1 }); - expectedMapping.put(18, new int[] { -1, -1 }); - expectedMapping.put(19, new int[] { -1, -1 }); - expectedMapping.put(20, new int[] { -1, -1 }); - expectedMapping.put(21, new int[] { -1, -1 }); - expectedMapping.put(22, new int[] { -1, -1 }); - expectedMapping.put(23, new int[] { -1, -1 }); - expectedMapping.put(24, new int[] { -1, -1 }); - expectedMapping.put(25, new int[] { -1, -1 }); - expectedMapping.put(26, new int[] { -1, -1 }); - expectedMapping.put(27, new int[] { -1, -1 }); - expectedMapping.put(28, new int[] { -1, -1 }); - expectedMapping.put(29, new int[] { -1, -1 }); - expectedMapping.put(30, new int[] { -1, -1 }); - expectedMapping.put(31, new int[] { -1, -1 }); - expectedMapping.put(32, new int[] { -1, -1 }); - expectedMapping.put(33, new int[] { -1, -1 }); - expectedMapping.put(34, new int[] { -1, -1 }); - expectedMapping.put(35, new int[] { -1, -1 }); - expectedMapping.put(36, new int[] { -1, -1 }); - expectedMapping.put(37, new int[] { -1, -1 }); - expectedMapping.put(38, new int[] { -1, -1 }); - expectedMapping.put(39, new int[] { -1, -1 }); - expectedMapping.put(40, new int[] { -1, -1 }); - expectedMapping.put(41, new int[] { -1, -1 }); - expectedMapping.put(42, new int[] { -1, -1 }); - expectedMapping.put(43, new int[] { -1, -1 }); - expectedMapping.put(44, new int[] { -1, -1 }); - expectedMapping.put(45, new int[] { -1, -1 }); - expectedMapping.put(46, new int[] { -1, -1 }); - expectedMapping.put(47, new int[] { -1, -1 }); - // forty eight gaps in PDB sequence + // PDB sequence starts with residue 48 expectedMapping.put(48, new int[] { 48, 1 }); expectedMapping.put(49, new int[] { 49, 6 }); expectedMapping.put(50, new int[] { 50, 12 }); @@ -94,11 +46,8 @@ public class Phyre2ClientTest expectedMapping.put(57, new int[] { 57, 72 }); expectedMapping.put(58, new int[] { 58, 79 }); expectedMapping.put(59, new int[] { 59, 87 }); - expectedMapping.put(60, new int[] { -1, -1 }); - expectedMapping.put(61, new int[] { -1, -1 }); - // two gaps in PDB sequence - expectedMapping.put(62, new int[] { 60, -1 }); - expectedMapping.put(63, new int[] { 61, -1 }); + // residues 60, 61 absent in PDB file + // residues 62, 63 also skipped in map (is this right?) expectedMapping.put(64, new int[] { 62, 91 }); expectedMapping.put(65, new int[] { 63, 100 }); expectedMapping.put(66, new int[] { 64, 111 }); @@ -151,11 +100,10 @@ public class Phyre2ClientTest expectedMapping.put(113, new int[] { 111, 445 }); expectedMapping.put(114, new int[] { 112, 453 }); expectedMapping.put(115, new int[] { 113, 461 }); - // one gap in PDB sequence - expectedMapping.put(116, new int[] { -1, -1 }); + // residue 116 absent in PDB file expectedMapping.put(117, new int[] { 114, 469 }); expectedMapping.put(118, new int[] { 115, 477 }); - expectedMapping.put(119, new int[] { 116, -1 }); + // residue 119 gets removed as mapped to 116 - is this right? expectedMapping.put(120, new int[] { 117, 486 }); expectedMapping.put(121, new int[] { 118, 495 }); expectedMapping.put(122, new int[] { 119, 504 }); @@ -180,7 +128,7 @@ public class Phyre2ClientTest expectedMapping.put(141, new int[] { 138, 652 }); expectedMapping.put(142, new int[] { 139, 661 }); expectedMapping.put(143, new int[] { 140, 670 }); - expectedMapping.put(144, new int[] { -1, -1 }); + // residue 144 absent in PDB file StructureFile structureFile; try @@ -222,40 +170,7 @@ public class Phyre2ClientTest 13, 139); HashMap expectedMapping = new HashMap(); - - expectedMapping.put(13, new int[] { -1, -1 }); - expectedMapping.put(14, new int[] { -1, -1 }); - expectedMapping.put(15, new int[] { -1, -1 }); - expectedMapping.put(16, new int[] { -1, -1 }); - expectedMapping.put(17, new int[] { -1, -1 }); - expectedMapping.put(18, new int[] { -1, -1 }); - expectedMapping.put(19, new int[] { -1, -1 }); - expectedMapping.put(20, new int[] { -1, -1 }); - expectedMapping.put(21, new int[] { -1, -1 }); - expectedMapping.put(22, new int[] { -1, -1 }); - expectedMapping.put(23, new int[] { -1, -1 }); - expectedMapping.put(24, new int[] { -1, -1 }); - expectedMapping.put(25, new int[] { -1, -1 }); - expectedMapping.put(26, new int[] { -1, -1 }); - expectedMapping.put(27, new int[] { -1, -1 }); - expectedMapping.put(28, new int[] { -1, -1 }); - expectedMapping.put(29, new int[] { -1, -1 }); - expectedMapping.put(30, new int[] { -1, -1 }); - expectedMapping.put(31, new int[] { -1, -1 }); - expectedMapping.put(32, new int[] { -1, -1 }); - expectedMapping.put(33, new int[] { -1, -1 }); - expectedMapping.put(34, new int[] { -1, -1 }); - expectedMapping.put(35, new int[] { -1, -1 }); - expectedMapping.put(36, new int[] { -1, -1 }); - expectedMapping.put(37, new int[] { -1, -1 }); - expectedMapping.put(38, new int[] { -1, -1 }); - expectedMapping.put(39, new int[] { -1, -1 }); - expectedMapping.put(40, new int[] { -1, -1 }); - expectedMapping.put(41, new int[] { -1, -1 }); - expectedMapping.put(42, new int[] { -1, -1 }); - expectedMapping.put(43, new int[] { -1, -1 }); - expectedMapping.put(44, new int[] { -1, -1 }); - // thirty two gaps in PDB sequence + // PDB sequence starts with residue 33 expectedMapping.put(45, new int[] { 33, 1 }); expectedMapping.put(46, new int[] { 34, 6 }); expectedMapping.put(47, new int[] { 35, 13 }); @@ -372,6 +287,7 @@ public class Phyre2ClientTest Assert.assertEquals(testSeq.getStart(), 13); Assert.assertEquals(testSeq.getEnd(), 139); + Assert.assertEquals(actualMapping, expectedMapping); testMappings(actualMapping, expectedMapping); } catch (Exception e) { diff --git a/test/jalview/ws/sifts/SiftsClientTest.java b/test/jalview/ws/sifts/SiftsClientTest.java index fc59241..496b6dc 100644 --- a/test/jalview/ws/sifts/SiftsClientTest.java +++ b/test/jalview/ws/sifts/SiftsClientTest.java @@ -79,7 +79,7 @@ public class SiftsClientTest + "AYKVTLVTPTGNVEFQCPDDVYILDAAEEEGIDLPYSCRAGSCSSCAGKLKTGSLNQDD" + "QSFLDDDQIDEGWVLTCAAYPVSDVTIETHKEEELTA.", 1, 147); - int u = SiftsClient.UNASSIGNED; + int u = StructureMapping.UNASSIGNED; HashMap expectedMapping = new HashMap(); -- 1.7.10.2