import jalview.api.DBRefEntryI;
import jalview.api.SiftsClientI;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
import jalview.datamodel.SequenceI;
import jalview.schemes.ResidueProperties;
import jalview.structure.StructureMapping;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.HashSet;
-import java.util.LinkedHashMap;
import java.util.List;
+import java.util.TreeMap;
import java.util.zip.GZIPInputStream;
import javax.xml.bind.JAXBContext;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
+import MCview.Atom;
+import MCview.PDBChain;
+import MCview.PDBfile;
+
public class SiftsClient implements SiftsClientI
{
private Entry siftsEntry;
+ private PDBfile pdb;
+
private String pdbId;
private String structId;
private String segStartEnd;
+ private static final String UNIPROT_COORDINATE_SYS = "UniProt";
+
+ private static final String PDB_COORDINATE_SYS = "PDBresnum";
+
+ private String seqCoordSys = UNIPROT_COORDINATE_SYS;
+
private static final int BUFFER_SIZE = 4096;
+ public static final int UNASSIGNED = -1;
+
+ private static final int PDB_RES_POS = 0;
+
+ private static final int PDB_ATOM_POS = 1;
+
private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/";
public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System
*
* @param pdbId
*/
- public SiftsClient(String pdbId)
+ public SiftsClient(PDBfile pdb)
{
- this.pdbId = pdbId;
+ this.pdb = pdb;
+ this.pdbId = pdb.id;
try
{
File siftsFile = getSiftsFile(pdbId);
* @param pdbId
* @param siftsFile
*/
- public SiftsClient(String pdbId, File siftsFile)
+ public SiftsClient(PDBfile pdb, File siftsFile)
{
- this.pdbId = pdbId;
+ this.pdb = pdb;
+ this.pdbId = pdb.id;
try
{
siftsEntry = parseSIFTs(siftsFile);
+ ".xml.gz");
if (siftsFile.exists())
{
- // TODO it may be worth performing a timestamp age check to determine if a
+ // TODO it may be worth performing an age check to determine if a
// new SIFTs file should be re-downloaded as SIFTs entries are usually
// updated weekly
System.out.println(">>> SIFTS File already downloaded for " + pdbId);
{
return entry != null && entry.getAccessionId() != null
&& isFoundInSiftsEntry(entry.getAccessionId());
- // & entry.getStartRes() > 0;
}
@Override
int[][] mapping = getGreedyMapping(chain, seq, ps);
String mappingOutput = mappingDetails.toString();
- return new StructureMapping(seq, pdbFile, pdbId, chain, mapping,
+ StructureMapping siftsMapping = new StructureMapping(seq, pdbFile,
+ pdbId, chain, mapping,
mappingOutput);
+ return siftsMapping;
}
@Override
java.io.PrintStream os)
throws SiftsException
{
- int matchedResStart = -1;
- int matchedResEnd = -1;
- int counter = 0;
- int pdbStart = -1;
- int pdbEnd = -1;
- int sStart = -1;
- int sEnd = -1;
- boolean startDetected = false;
System.out.println("Generating mappings for : " + entityId);
Entity entity = null;
entity = getEntityById(entityId);
- String seqStr = AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
+ String originalSeq = AlignSeq.extractGaps(
+ jalview.util.Comparison.GapChars,
seq.getSequenceAsString());
- int mapping[][] = new int[seqStr.length() + seq.getStart()][2];
+ int mapping[][] = new int[originalSeq.length() + seq.getStart()][2];
DBRefEntryI sourceDBRef = seq.getSourceDBRef();
if (sourceDBRef == null)
{
sourceDBRef = getValidSourceDBRef(seq);
- // TODO update sequence start/end with sourceDBRef start/end
- // seq.setStart(sourceDBRef.getStartRes());
- // seq.setEnd(sourceDBRef.getEndRes());
+ // TODO ensure sequence start/end is in the same coordinate system and
+ // consistent with the choosen sourceDBRef
+ }
+
+ // set sequence coordinate system - default value is UniProt
+ if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
+ {
+ seqCoordSys = PDB_COORDINATE_SYS;
}
- String crossRefAccessionId = sourceDBRef.getAccessionId();
- int count = 0;
- for (int residue[] : mapping)
+ ArrayList<String> dbRefAccessionIdsString = new ArrayList<String>();
+ for (DBRefEntry dbref : seq.getDBRefs())
{
- residue[1] = count++;
- residue[0] = -1;
+ dbRefAccessionIdsString.add(dbref.getAccessionId());
+ }
+
+ // initialise all mapping positions to unassigned
+ for (int residuePos[] : mapping)
+ {
+ residuePos[PDB_RES_POS] = UNASSIGNED;
+ residuePos[PDB_ATOM_POS] = UNASSIGNED;
}
- LinkedHashMap<Integer, String> resNumMap = new LinkedHashMap<Integer, String>();
+ TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>();
List<Segment> segments = entity.getSegment();
for (Segment segment : segments)
{
List<Residue> residues = segment.getListResidue().getResidue();
for (Residue residue : residues)
{
- int refDbResNum = -1;
+ int currSeqIndex = UNASSIGNED;
List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
for (CrossRefDb cRefDb : cRefDbs)
{
- if (cRefDb.getDbAccessionId().equalsIgnoreCase(
- crossRefAccessionId))
+ if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys)
+ && dbRefAccessionIdsString.contains(cRefDb
+ .getDbAccessionId()))
{
- refDbResNum = Integer.valueOf(cRefDb.getDbResNum());
+ String resNumIndexString = cRefDb.getDbResNum()
+ .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED)
+ : cRefDb.getDbResNum();
+ currSeqIndex = Integer.valueOf(resNumIndexString);
+ break;
}
}
- if (refDbResNum == -1)
+ if (currSeqIndex == UNASSIGNED)
{
continue;
}
- int loopCount = 0;
- for (int[] x : mapping)
+ if (currSeqIndex > seq.getStart() && currSeqIndex <= seq.getEnd())
{
- if (loopCount > seq.getStart() && x[1] == refDbResNum)
- {
- int resNum = Integer.valueOf(residue.getDbResNum());
- x[0] = resNum;
- char resCharCode = ResidueProperties
- .getSingleCharacterCode(residue.getDbResName());
- resNumMap.put(resNum, String.valueOf(resCharCode));
- }
- ++loopCount;
+ int resNum = Integer.valueOf(residue.getDbResNum());
+ mapping[currSeqIndex][PDB_RES_POS] = Integer.valueOf(resNum);
+ char resCharCode = ResidueProperties
+ .getSingleCharacterCode(residue.getDbResName());
+ resNumMap.put(currSeqIndex, String.valueOf(resCharCode));
}
}
}
-
+ try
+ {
+ populateAtomPositions(entityId, mapping);
+ } catch (Exception e)
+ {
+ e.printStackTrace();
+ }
+ padWithGaps(resNumMap);
+ int counter = 0;
+ int seqStart = 0;
+ int seqEnd = 0;
+ int pdbStart = 0;
+ int pdbEnd = 0;
+ boolean startDetected = false;
for (int[] x : mapping)
{
- if (!startDetected && x[0] > -1)
+ if (!startDetected && x[PDB_RES_POS] != UNASSIGNED)
{
- matchedResStart = counter;
- // System.out.println(matchedResStart);
+ seqStart = counter;
startDetected = true;
+ // System.out.println("Seq start: "+ seqStart);
}
- if (startDetected && x[0] == -1)
+ if (startDetected && x[PDB_RES_POS] != UNASSIGNED)
{
- matchedResEnd = counter;
+ seqEnd = counter;
}
++counter;
}
- String matchedSeqStr = seqStr;
- if (matchedResStart != -1)
+ String matchedSeq = originalSeq;
+ if (seqStart != UNASSIGNED)
{
- matchedResEnd = (matchedResEnd == -1) ? counter : matchedResEnd;
- pdbStart = mapping[matchedResStart][0];
- pdbEnd = mapping[matchedResEnd - 1][0];
- sStart = mapping[matchedResStart][1];
- sEnd = mapping[matchedResEnd - 1][1];
- int seqStart = seq.getStart();
- if (seqStart > 1)
- {
- matchedResStart = matchedResStart - seqStart;
- matchedResEnd = matchedResEnd - seqStart;
- }
- else
+ seqEnd = (seqEnd == UNASSIGNED) ? counter : seqEnd;
+ pdbStart = mapping[seqStart][PDB_RES_POS];
+ pdbEnd = mapping[seqEnd][PDB_RES_POS];
+ int orignalSeqStart = seq.getStart();
+ if (orignalSeqStart >= 1)
{
- --matchedResStart;
- --matchedResEnd;
+ int subSeqStart = seqStart - orignalSeqStart;
+ int subSeqEnd = seqEnd - (orignalSeqStart - 1);
+ matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd);
}
- matchedSeqStr = seqStr.substring(matchedResStart, matchedResEnd);
}
StringBuilder targetStrucSeqs = new StringBuilder();
if (os != null)
{
MappingOutputPojo mop = new MappingOutputPojo();
- mop.setSeqStart(sStart);
- mop.setSeqEnd(sEnd);
+ mop.setSeqStart(seqStart);
+ mop.setSeqEnd(seqEnd);
mop.setSeqName(seq.getName());
- mop.setSeqResidue(matchedSeqStr);
+ mop.setSeqResidue(matchedSeq);
mop.setStrStart(pdbStart);
mop.setStrEnd(pdbEnd);
&& getAllMappingAccession().contains(accessionId);
}
+ /**
+ * Pads missing positions with gaps
+ *
+ * @param resNumMap
+ */
+ void padWithGaps(TreeMap<Integer, String> resNumMap)
+ {
+ Integer[] keys = resNumMap.keySet().toArray(new Integer[0]);
+ Arrays.sort(keys);
+ int firstIndex = keys[0];
+ int lastIndex = keys[keys.length - 1];
+ System.out.println("Min value " + firstIndex);
+ System.out.println("Max value " + lastIndex);
+ for (int x = firstIndex; x <= lastIndex; x++)
+ {
+ if (!resNumMap.containsKey(x))
+ {
+ resNumMap.put(x, "-");
+ }
+ }
+ }
+
+ /**
+ *
+ * @param chainId
+ * Target chain to populate mapping of its atom positions.
+ * @param mapping
+ * Two dimension array of residue index versus atom position
+ * @throws IllegalArgumentException
+ * Thrown if chainId or mapping is null
+ */
+ void populateAtomPositions(String chainId, int[][] mapping)
+ throws IllegalArgumentException
+ {
+ PDBChain chain = pdb.findChain(chainId);
+ if (chain == null || mapping == null)
+ {
+ throw new IllegalArgumentException(
+ "Chain id or mapping must not be null.");
+ }
+ for (int[] map : mapping)
+ {
+ if (map[PDB_RES_POS] != UNASSIGNED)
+ {
+ map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms);
+ }
+ }
+ }
+
+ /**
+ *
+ * @param residueIndex
+ * The residue index used for the search
+ * @param atoms
+ * A collection of Atom to search
+ * @return atom position for the given residue index
+ */
+ int getAtomIndex(int residueIndex, Collection<Atom> atoms)
+ {
+ if (atoms == null)
+ {
+ throw new IllegalArgumentException(
+ "atoms collection must not be null!");
+ }
+ for (Atom atom : atoms)
+ {
+ if (atom.resNumber == residueIndex)
+ {
+ return atom.atomIndex;
+ }
+ }
+ return UNASSIGNED;
+ }
+
@Override
public Entity getEntityById(String id) throws SiftsException
{
@Override
public StringBuffer getMappingOutput(MappingOutputPojo mp)
+ throws SiftsException
{
String seqRes = mp.getSeqResidue();
String seqName = mp.getSeqName();
// output mappings
StringBuffer output = new StringBuffer();
output.append(NEWLINE);
- output.append("Sequence ⟷ Structure mapping details:");
+ output.append("Sequence ⟷ Structure mapping details").append(NEWLINE);
+ output.append("Method: SIFTS");
output.append(NEWLINE).append(NEWLINE);
output.append(new Format("%" + maxid + "s").form(seqName));
output.append(String.valueOf(pdbEnd));
output.append(NEWLINE).append(NEWLINE);
- float pid = 0;
+ int matchedSeqCount = 0;
for (int j = 0; j < nochunks; j++)
{
// Print the first aligned sequence
// Print out the matching chars
for (int i = 0; i < len; i++)
{
+ try
+ {
if ((i + (j * len)) < seqRes.length())
{
if (seqRes.charAt(i + (j * len)) == strRes.charAt(i + (j * len))
&& !jalview.util.Comparison.isGap(seqRes.charAt(i
+ (j * len))))
{
- pid++;
+ matchedSeqCount++;
output.append("|");
}
else if (type.equals("pep"))
output.append(" ");
}
}
+ } catch (IndexOutOfBoundsException e)
+ {
+ continue;
+ }
}
// Now print the second aligned sequence
output = output.append(NEWLINE);
}
output.append(NEWLINE).append(NEWLINE);
}
- pid = pid / (seqRes.length()) * 100;
+ float pid = (float) matchedSeqCount / seqRes.length() * 100;
output.append("Length of alignment = " + seqRes.length())
.append(NEWLINE);
output.append(new Format("Percentage ID = %2.2f").form(pid));
output.append(NEWLINE);
- output.append("Mapping method: SIFTS").append(NEWLINE);
return output;
}
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
+import MCview.PDBfile;
+
public class SiftsClientTest
{
private final ByteArrayOutputStream outContent = new ByteArrayOutputStream();
+ "AYKVTLVTPTGNVEFQCPDDVYILDAAEEEGIDLPYSCRAGSCSSCAGKLKTGSLNQDD"
+ "QSFLDDDQIDEGWVLTCAAYPVSDVTIETHKEEELTA.", 1, 147);
- int[][] expectedMapping = { { -1, 0 }, { -1, 1 }, { -1, 2 }, { -1, 3 },
- { -1, 4 }, { -1, 5 }, { -1, 6 }, { -1, 7 }, { -1, 8 }, { -1, 9 },
- { -1, 10 }, { -1, 11 }, { -1, 12 }, { -1, 13 }, { -1, 14 },
- { -1, 15 }, { -1, 16 }, { -1, 17 }, { -1, 18 }, { -1, 19 },
- { -1, 20 }, { -1, 21 }, { -1, 22 }, { -1, 23 }, { -1, 24 },
- { -1, 25 }, { -1, 26 }, { -1, 27 }, { -1, 28 }, { -1, 29 },
- { -1, 30 }, { -1, 31 }, { -1, 32 }, { -1, 33 }, { -1, 34 },
- { -1, 35 }, { -1, 36 }, { -1, 37 }, { -1, 38 }, { -1, 39 },
- { -1, 40 }, { -1, 41 }, { -1, 42 }, { -1, 43 }, { -1, 44 },
- { -1, 45 }, { -1, 46 }, { -1, 47 }, { -1, 48 }, { -1, 49 },
- { -1, 50 }, { 1, 51 }, { 2, 52 }, { 3, 53 }, { 4, 54 }, { 5, 55 },
- { 6, 56 }, { 7, 57 }, { 8, 58 }, { 9, 59 }, { 10, 60 }, { 11, 61 },
- { 12, 62 }, { 13, 63 }, { 14, 64 }, { 15, 65 }, { 16, 66 },
- { 17, 67 }, { 18, 68 }, { 19, 69 }, { 20, 70 }, { 21, 71 },
- { 22, 72 }, { 23, 73 }, { 24, 74 }, { 25, 75 }, { 26, 76 },
- { 27, 77 }, { 28, 78 }, { 29, 79 }, { 30, 80 }, { 31, 81 },
- { 32, 82 }, { 33, 83 }, { 34, 84 }, { 35, 85 }, { 36, 86 },
- { 37, 87 }, { 38, 88 }, { 39, 89 }, { 40, 90 }, { 41, 91 },
- { 42, 92 }, { 43, 93 }, { 44, 94 }, { 45, 95 }, { 46, 96 },
- { 47, 97 }, { 48, 98 }, { 49, 99 }, { 50, 100 }, { 51, 101 },
- { 52, 102 }, { 53, 103 }, { 54, 104 }, { 55, 105 }, { 56, 106 },
- { 57, 107 }, { 58, 108 }, { 59, 109 }, { 60, 110 }, { 61, 111 },
- { 62, 112 }, { 63, 113 }, { 64, 114 }, { 65, 115 }, { 66, 116 },
- { 67, 117 }, { 68, 118 }, { 69, 119 }, { 70, 120 }, { 71, 121 },
- { 72, 122 }, { 73, 123 }, { 74, 124 }, { 75, 125 }, { 76, 126 },
- { 77, 127 }, { 78, 128 }, { 79, 129 }, { 80, 130 }, { 81, 131 },
- { 82, 132 }, { 83, 133 }, { 84, 134 }, { 85, 135 }, { 86, 136 },
- { 87, 137 }, { 88, 138 }, { 89, 139 }, { 90, 140 }, { 91, 141 },
- { 92, 142 }, { 93, 143 }, { 94, 144 }, { 95, 145 }, { 96, 146 },
- { 97, 147 } };
+ int u = SiftsClient.UNASSIGNED;
+
+ int[][] expectedMapping = { { u, u }, { u, u }, { u, u }, { u, u },
+ { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u },
+ { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u },
+ { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u },
+ { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u },
+ { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u },
+ { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { u, u },
+ { u, u }, { u, u }, { u, u }, { u, u }, { u, u }, { 1, u }, { 2, u },
+ { 3, u }, { 4, u }, { 5, u }, { 6, u }, { 7, u }, { 8, u }, { 9, u },
+ { 10, u }, { 11, u }, { 12, u }, { 13, u }, { 14, u }, { 15, u },
+ { 16, u }, { 17, u }, { 18, u }, { 19, u }, { 20, u }, { 21, u },
+ { 22, u }, { 23, u }, { 24, u }, { 25, u }, { 26, u }, { 27, u },
+ { 28, u }, { 29, u }, { 30, u }, { 31, u }, { 32, u }, { 33, u },
+ { 34, u }, { 35, u }, { 36, u }, { 37, u }, { 38, u }, { 39, u },
+ { 40, u }, { 41, u }, { 42, u }, { 43, u }, { 44, u }, { 45, u },
+ { 46, u }, { 47, u }, { 48, u }, { 49, u }, { 50, u }, { 51, u },
+ { 52, u }, { 53, u }, { 54, u }, { 55, u }, { 56, u }, { 57, u },
+ { 58, u }, { 59, u }, { 60, u }, { 61, u }, { 62, u }, { 63, u },
+ { 64, u }, { 65, u }, { 66, u }, { 67, u }, { 68, u }, { 69, u },
+ { 70, u }, { 71, u }, { 72, u }, { 73, u }, { 74, u }, { 75, u },
+ { 76, u }, { 77, u }, { 78, u }, { 79, u }, { 80, u }, { 81, u },
+ { 82, u }, { 83, u }, { 84, u }, { 85, u }, { 86, u }, { 87, u },
+ { 88, u }, { 89, u }, { 90, u }, { 91, u }, { 92, u }, { 93, u },
+ { 94, u }, { 95, u }, { 96, u }, { 97, u } };
@BeforeTest(alwaysRun = true)
public void setUpSiftsClient()
// test reproducibility
File testSiftsFile = new File("test/jalview/io/" + testPDBId
+ ".xml.gz");
- siftsClient = new SiftsClient(testPDBId, testSiftsFile);
+ PDBfile pdbFile = new PDBfile(false, false, false);
+ siftsClient = new SiftsClient(pdbFile, testSiftsFile);
}
@AfterTest(alwaysRun = true)
}
@Test(groups = { "Functional" })
+ private void getAtomIndexTest()
+ {
+ // siftsClient.getAtomIndex(1, null);
+ // Assert.assertTrue(true);
+ }
+
+ @Test(
+ groups = { "Functional" },
+ expectedExceptions = IllegalArgumentException.class)
+ private void getAtomIndexNullTest()
+ {
+ siftsClient.getAtomIndex(1, null);
+ }
+
+ @Test(groups = { "Functional" })
+ private void padWithGapsTest()
+ {
+
+ }
+
+ @Test(groups = { "Functional" })
+ private void populateAtomPositionsTest()
+ {
+
+ }
+
+ @Test(groups = { "Functional" })
public void getValidSourceDBRefTest()
{