2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ws.sifts;
23 import jalview.analysis.AlignSeq;
24 import jalview.api.DBRefEntryI;
25 import jalview.api.SiftsClientI;
26 import jalview.datamodel.DBRefEntry;
27 import jalview.datamodel.DBRefSource;
28 import jalview.datamodel.SequenceI;
29 import jalview.schemes.ResidueProperties;
30 import jalview.structure.StructureMapping;
31 import jalview.util.Format;
32 import jalview.xml.binding.sifts.Entry;
33 import jalview.xml.binding.sifts.Entry.Entity;
34 import jalview.xml.binding.sifts.Entry.Entity.Segment;
35 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
36 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
37 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
38 import jalview.xml.binding.sifts.Entry.ListDB.Db;
41 import java.io.FileInputStream;
42 import java.io.FileNotFoundException;
43 import java.io.FileOutputStream;
44 import java.io.IOException;
45 import java.io.InputStream;
46 import java.io.PrintStream;
48 import java.net.URLConnection;
49 import java.util.ArrayList;
50 import java.util.Arrays;
51 import java.util.Collection;
52 import java.util.HashSet;
53 import java.util.List;
54 import java.util.TreeMap;
55 import java.util.zip.GZIPInputStream;
57 import javax.xml.bind.JAXBContext;
58 import javax.xml.bind.JAXBException;
59 import javax.xml.bind.Unmarshaller;
60 import javax.xml.stream.FactoryConfigurationError;
61 import javax.xml.stream.XMLInputFactory;
62 import javax.xml.stream.XMLStreamException;
63 import javax.xml.stream.XMLStreamReader;
66 import MCview.PDBChain;
67 import MCview.PDBfile;
69 public class SiftsClient implements SiftsClientI
71 private Entry siftsEntry;
77 private String structId;
79 private String segStartEnd;
81 private CoordinateSys seqCoordSys = CoordinateSys.UNIPROT;
83 private static final int BUFFER_SIZE = 4096;
85 public static final int UNASSIGNED = -1;
87 private static final int PDB_RES_POS = 0;
89 private static final int PDB_ATOM_POS = 1;
91 private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/";
93 public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System
94 .getProperty("user.home")
96 + ".sifts_downloads" + File.separatorChar;
98 public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache
99 .getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR);
101 private final static String NEWLINE = System.lineSeparator();
103 private String curSourceDBRef;
105 private HashSet<String> curDBRefAccessionIdsString;
107 public enum CoordinateSys
109 UNIPROT("UniProt"), PDB("PDBresnum"), PDBe("PDBe");
112 private CoordinateSys(String name)
117 public String getName()
124 * Fetch SIFTs file for the given PDB Id and construct an instance of
128 * @throws SiftsException
130 public SiftsClient(PDBfile pdb) throws SiftsException
134 File siftsFile = getSiftsFile(pdbId);
135 siftsEntry = parseSIFTs(siftsFile);
139 * Construct an instance of SiftsClient using the supplied SIFTs file - the
140 * SIFTs file should correspond to the given PDB Id
144 * @throws SiftsException
147 public SiftsClient(PDBfile pdb, File siftsFile) throws SiftsException
151 siftsEntry = parseSIFTs(siftsFile);
155 * Parse the given SIFTs File and return a JAXB POJO of parsed data
158 * - the GZipped SIFTs XML file to parse
161 * if a problem occurs while parsing the SIFTs XML
163 private Entry parseSIFTs(File siftFile) throws SiftsException
167 System.out.println("File : " + siftFile.getAbsolutePath());
168 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
169 InputStream in = new FileInputStream(siftFile);
170 GZIPInputStream gzis = new GZIPInputStream(in);
171 XMLStreamReader streamReader = XMLInputFactory.newInstance()
172 .createXMLStreamReader(gzis);
173 Unmarshaller um = jc.createUnmarshaller();
174 return (Entry) um.unmarshal(streamReader);
175 } catch (JAXBException e)
178 } catch (FileNotFoundException e)
181 } catch (XMLStreamException e)
184 } catch (FactoryConfigurationError e)
187 } catch (IOException e)
191 throw new SiftsException("Error parsing siftFile");
195 * Get a SIFTs XML file for a given PDB Id
198 * @return SIFTs XML file
200 public static File getSiftsFile(String pdbId)
202 File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase()
204 if (siftsFile.exists())
206 // TODO it may be worth performing an age check to determine if a
207 // new SIFTs file should be re-downloaded as SIFTs entries are usually
209 System.out.println(">>> SIFTS File already downloaded for " + pdbId);
212 siftsFile = downloadSiftsFile(pdbId.toLowerCase());
217 * Download a SIFTs XML file for a given PDB Id
220 * @return downloaded SIFTs XML file
222 public static File downloadSiftsFile(String pdbId)
224 String siftFile = pdbId + ".xml.gz";
225 String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile;
226 String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile;
227 File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR);
228 if (!siftsDownloadDir.exists())
230 siftsDownloadDir.mkdirs();
234 System.out.println(">> Download ftp url : " + siftsFileFTPURL);
235 URL url = new URL(siftsFileFTPURL);
236 URLConnection conn = url.openConnection();
237 InputStream inputStream = conn.getInputStream();
238 FileOutputStream outputStream = new FileOutputStream(
239 downloadedSiftsFile);
240 byte[] buffer = new byte[BUFFER_SIZE];
242 while ((bytesRead = inputStream.read(buffer)) != -1)
244 outputStream.write(buffer, 0, bytesRead);
246 outputStream.close();
248 System.out.println(">>> File downloaded : " + downloadedSiftsFile);
249 } catch (IOException ex)
251 ex.printStackTrace();
253 return new File(downloadedSiftsFile);
257 * Delete the SIFTs file for the given PDB Id in the local SIFTs download
261 * @return true if the file was deleted or doesn't exist
263 public static boolean deleteSiftsFileByPDBId(String pdbId)
265 File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase()
267 if (siftsFile.exists())
269 return siftsFile.delete();
276 * Get a valid SIFTs DBRef for the given sequence current SIFTs entry
279 * - the target sequence for the operation
280 * @return a valid DBRefEntry that is SIFTs compatible
282 * if no valid source DBRefEntry was found for the given sequences
284 public DBRefEntryI getValidSourceDBRef(SequenceI seq)
285 throws SiftsException
287 DBRefEntryI sourceDBRef = null;
288 sourceDBRef = seq.getSourceDBRef();
289 if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
295 DBRefEntry[] dbRefs = seq.getDBRefs();
296 if (dbRefs == null || dbRefs.length < 1)
298 final SequenceI[] seqs = new SequenceI[] { seq };
299 new jalview.ws.DBRefFetcher(seqs, null, null, null, false)
301 dbRefs = seq.getDBRefs();
304 if (dbRefs == null || dbRefs.length < 1)
306 throw new SiftsException("Could not get source DB Ref");
309 for (DBRefEntryI dbRef : dbRefs)
311 if (dbRef == null || dbRef.getAccessionId() == null
312 || dbRef.getSource() == null)
316 if (isFoundInSiftsEntry(dbRef.getAccessionId())
317 && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
318 .getSource().equalsIgnoreCase(DBRefSource.PDB)))
324 if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
328 throw new SiftsException("Could not get source DB Ref");
333 * Check that the DBRef Entry is properly populated and is available in the
334 * instantiated SIFTs Entry
337 * - DBRefEntry to validate
338 * @return true validation is successful otherwise false is returned.
340 private boolean isValidDBRefEntry(DBRefEntryI entry)
342 return entry != null && entry.getAccessionId() != null
343 && isFoundInSiftsEntry(entry.getAccessionId());
347 public HashSet<String> getAllMappingAccession()
349 HashSet<String> accessions = new HashSet<String>();
350 List<Entity> entities = siftsEntry.getEntity();
351 for (Entity entity : entities)
353 List<Segment> segments = entity.getSegment();
354 for (Segment segment : segments)
356 List<MapRegion> mapRegions = segment.getListMapRegion()
358 for (MapRegion mapRegion : mapRegions)
360 accessions.add(mapRegion.getDb().getDbAccessionId());
368 public StructureMapping getSiftsStructureMapping(SequenceI seq,
369 String pdbFile, String chain) throws SiftsException
371 structId = (chain == null) ? pdbId : pdbId + "|" + chain;
372 System.out.println("Getting mapping for: " + pdbId + "|" + chain
373 + " : seq- " + seq.getName());
375 final StringBuilder mappingDetails = new StringBuilder(128);
376 PrintStream ps = new PrintStream(System.out)
379 public void print(String x)
381 mappingDetails.append(x);
385 public void println()
387 mappingDetails.append(NEWLINE);
390 int[][] mapping = getGreedyMapping(chain, seq, ps);
392 String mappingOutput = mappingDetails.toString();
393 StructureMapping siftsMapping = new StructureMapping(seq, pdbFile,
394 pdbId, chain, mapping,
400 public int[][] getGreedyMapping(String entityId, SequenceI seq,
401 java.io.PrintStream os)
402 throws SiftsException
405 System.out.println("Generating mappings for : " + entityId);
406 Entity entity = null;
407 entity = getEntityById(entityId);
408 String originalSeq = AlignSeq.extractGaps(
409 jalview.util.Comparison.GapChars,
410 seq.getSequenceAsString());
411 int mapping[][] = new int[originalSeq.length() + seq.getStart()][2];
412 DBRefEntryI sourceDBRef = seq.getSourceDBRef();
413 if (sourceDBRef == null)
415 sourceDBRef = getValidSourceDBRef(seq);
416 // TODO ensure sequence start/end is in the same coordinate system and
417 // consistent with the choosen sourceDBRef
420 // set sequence coordinate system - default value is UniProt
421 if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
423 seqCoordSys = CoordinateSys.PDB;
426 HashSet<String> dbRefAccessionIdsString = new HashSet<String>();
427 for (DBRefEntry dbref : seq.getDBRefs())
429 dbRefAccessionIdsString.add(dbref.getAccessionId().toLowerCase());
431 dbRefAccessionIdsString.add(sourceDBRef.getAccessionId().toLowerCase());
433 curDBRefAccessionIdsString = dbRefAccessionIdsString;
434 curSourceDBRef = sourceDBRef.getAccessionId();
436 // initialise all mapping positions to unassigned
437 for (int residuePos[] : mapping)
439 residuePos[PDB_RES_POS] = UNASSIGNED;
440 residuePos[PDB_ATOM_POS] = UNASSIGNED;
443 TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>();
444 List<Segment> segments = entity.getSegment();
445 for (Segment segment : segments)
447 segStartEnd = segment.getStart() + " - " + segment.getEnd();
448 System.out.println("Mappging segments : " + segment.getSegId() + "\\"
450 List<Residue> residues = segment.getListResidue().getResidue();
451 for (Residue residue : residues)
453 int currSeqIndex = UNASSIGNED;
454 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
455 CrossRefDb pdbRefDb = null;
456 for (CrossRefDb cRefDb : cRefDbs)
458 if (cRefDb.getDbSource().equalsIgnoreCase(DBRefSource.PDB))
462 if (cRefDb.getDbCoordSys()
463 .equalsIgnoreCase(seqCoordSys.getName())
464 && hasAccessionId(cRefDb.getDbAccessionId()))
466 String resNumIndexString = cRefDb.getDbResNum()
467 .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED)
468 : cRefDb.getDbResNum();
469 currSeqIndex = Integer.valueOf(resNumIndexString);
470 if (pdbRefDb != null)
472 break;// exit loop if pdb and uniprot are already found
476 if (currSeqIndex == UNASSIGNED)
480 if (currSeqIndex > seq.getStart() && currSeqIndex <= seq.getEnd())
485 resNum = (pdbRefDb == null) ? Integer.valueOf(residue
486 .getDbResNum()) : Integer.valueOf(pdbRefDb.getDbResNum());
487 } catch (NumberFormatException nfe)
489 resNum = (pdbRefDb == null) ? Integer.valueOf(residue
490 .getDbResNum()) : Integer.valueOf(pdbRefDb
491 .getDbResNum().split("[a-zA-Z]")[0]);
495 mapping[currSeqIndex][PDB_RES_POS] = Integer.valueOf(resNum);
496 } catch (ArrayIndexOutOfBoundsException e)
500 char resCharCode = ResidueProperties
501 .getSingleCharacterCode(residue.getDbResName());
502 resNumMap.put(currSeqIndex, String.valueOf(resCharCode));
508 populateAtomPositions(entityId, mapping);
509 } catch (Exception e)
513 padWithGaps(resNumMap);
515 int seqStart = UNASSIGNED;
516 int seqEnd = UNASSIGNED;
517 int pdbStart = UNASSIGNED;
518 int pdbEnd = UNASSIGNED;
519 boolean startDetected = false;
520 for (int[] x : mapping)
522 if (!startDetected && x[PDB_RES_POS] != UNASSIGNED)
525 startDetected = true;
526 // System.out.println("Seq start: "+ seqStart);
529 if (startDetected && x[PDB_RES_POS] != UNASSIGNED)
536 String matchedSeq = originalSeq;
537 if (seqStart != UNASSIGNED)
539 seqEnd = (seqEnd == UNASSIGNED) ? counter : seqEnd;
540 pdbStart = mapping[seqStart][PDB_RES_POS];
541 pdbEnd = mapping[seqEnd][PDB_RES_POS];
542 int orignalSeqStart = seq.getStart();
543 if (orignalSeqStart >= 1)
545 int subSeqStart = seqStart - orignalSeqStart;
546 int subSeqEnd = seqEnd - (orignalSeqStart - 1);
547 matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd);
551 StringBuilder targetStrucSeqs = new StringBuilder();
552 for (String res : resNumMap.values())
554 targetStrucSeqs.append(res);
559 MappingOutputPojo mop = new MappingOutputPojo();
560 mop.setSeqStart(seqStart);
561 mop.setSeqEnd(seqEnd);
562 mop.setSeqName(seq.getName());
563 mop.setSeqResidue(matchedSeq);
565 mop.setStrStart(pdbStart);
566 mop.setStrEnd(pdbEnd);
567 mop.setStrName(structId);
568 mop.setStrResidue(targetStrucSeqs.toString());
571 os.print(getMappingOutput(mop).toString());
576 private boolean hasAccessionId(String accession)
578 boolean isStrictMatch = true;
579 return isStrictMatch ? curSourceDBRef.equalsIgnoreCase(accession)
580 : curDBRefAccessionIdsString.contains(accession.toLowerCase());
584 public boolean isFoundInSiftsEntry(String accessionId)
586 return accessionId != null
587 && getAllMappingAccession().contains(accessionId);
591 * Pads missing positions with gaps
595 void padWithGaps(TreeMap<Integer, String> resNumMap)
597 if (resNumMap == null || resNumMap.isEmpty())
601 Integer[] keys = resNumMap.keySet().toArray(new Integer[0]);
603 int firstIndex = keys[0];
604 int lastIndex = keys[keys.length - 1];
605 System.out.println("Min value " + firstIndex);
606 System.out.println("Max value " + lastIndex);
607 for (int x = firstIndex; x <= lastIndex; x++)
609 if (!resNumMap.containsKey(x))
611 resNumMap.put(x, "-");
619 * Target chain to populate mapping of its atom positions.
621 * Two dimension array of residue index versus atom position
622 * @throws IllegalArgumentException
623 * Thrown if chainId or mapping is null
625 void populateAtomPositions(String chainId, int[][] mapping)
626 throws IllegalArgumentException
628 PDBChain chain = pdb.findChain(chainId);
629 if (chain == null || mapping == null)
631 throw new IllegalArgumentException(
632 "Chain id or mapping must not be null.");
634 for (int[] map : mapping)
636 if (map[PDB_RES_POS] != UNASSIGNED)
638 map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms);
645 * @param residueIndex
646 * The residue index used for the search
648 * A collection of Atom to search
649 * @return atom position for the given residue index
651 int getAtomIndex(int residueIndex, Collection<Atom> atoms)
655 throw new IllegalArgumentException(
656 "atoms collection must not be null!");
658 for (Atom atom : atoms)
660 if (atom.resNumber == residueIndex)
662 return atom.atomIndex;
669 public Entity getEntityById(String id) throws SiftsException
671 List<Entity> entities = siftsEntry.getEntity();
672 for (Entity entity : entities)
674 if (!entity.getEntityId().equalsIgnoreCase(id))
680 throw new SiftsException("Entity " + id + " not found");
684 public String[] getEntryDBs()
686 System.out.println("\nListing DB entries...");
687 List<String> availDbs = new ArrayList<String>();
688 List<Db> dbs = siftsEntry.getListDB().getDb();
691 availDbs.add(db.getDbSource());
692 System.out.println(db.getDbSource() + " | " + db.getDbCoordSys());
694 return availDbs.toArray(new String[0]);
698 public StringBuffer getMappingOutput(MappingOutputPojo mp)
699 throws SiftsException
701 String seqRes = mp.getSeqResidue();
702 String seqName = mp.getSeqName();
703 int sStart = mp.getSeqStart();
704 int sEnd = mp.getSeqEnd();
706 String strRes = mp.getStrResidue();
707 String strName = mp.getStrName();
708 int pdbStart = mp.getStrStart();
709 int pdbEnd = mp.getStrEnd();
711 String type = mp.getType();
713 int maxid = (seqName.length() >= strName.length()) ? seqName.length()
715 int len = 72 - maxid - 1;
717 int nochunks = ((seqRes.length()) / len)
718 + ((seqRes.length()) % len > 0 ? 1 : 0);
720 StringBuffer output = new StringBuffer();
721 output.append(NEWLINE);
722 output.append("Sequence ⟷ Structure mapping details").append(NEWLINE);
723 output.append("Method: SIFTS");
724 output.append(NEWLINE).append(NEWLINE);
726 output.append(new Format("%" + maxid + "s").form(seqName));
727 output.append(" : ");
728 output.append(String.valueOf(sStart));
729 output.append(" - ");
730 output.append(String.valueOf(sEnd));
731 output.append(" Maps to ");
732 output.append(NEWLINE);
733 output.append(new Format("%" + maxid + "s").form(structId));
734 output.append(" : ");
735 output.append(String.valueOf(pdbStart));
736 output.append(" - ");
737 output.append(String.valueOf(pdbEnd));
738 output.append(NEWLINE).append(NEWLINE);
740 int matchedSeqCount = 0;
741 for (int j = 0; j < nochunks; j++)
743 // Print the first aligned sequence
744 output.append(new Format("%" + (maxid) + "s").form(seqName)).append(
747 for (int i = 0; i < len; i++)
749 if ((i + (j * len)) < seqRes.length())
751 output.append(seqRes.charAt(i + (j * len)));
755 output.append(NEWLINE);
756 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
758 // Print out the matching chars
759 for (int i = 0; i < len; i++)
763 if ((i + (j * len)) < seqRes.length())
765 if (seqRes.charAt(i + (j * len)) == strRes.charAt(i + (j * len))
766 && !jalview.util.Comparison.isGap(seqRes.charAt(i
772 else if (type.equals("pep"))
774 if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)),
775 strRes.charAt(i + (j * len))) > 0)
789 } catch (IndexOutOfBoundsException e)
794 // Now print the second aligned sequence
795 output = output.append(NEWLINE);
796 output = output.append(new Format("%" + (maxid) + "s").form(strName))
798 for (int i = 0; i < len; i++)
800 if ((i + (j * len)) < strRes.length())
802 output.append(strRes.charAt(i + (j * len)));
805 output.append(NEWLINE).append(NEWLINE);
807 float pid = (float) matchedSeqCount / seqRes.length() * 100;
810 throw new SiftsException("Low PID detected for SIFTs mapping...");
812 output.append("Length of alignment = " + seqRes.length())
814 output.append(new Format("Percentage ID = %2.2f").form(pid));
815 output.append(NEWLINE);
820 public int getEntityCount()
822 return siftsEntry.getEntity().size();
826 public String getDbAccessionId()
828 return siftsEntry.getDbAccessionId();
832 public String getDbCoordSys()
834 return siftsEntry.getDbCoordSys();
838 public String getDbEvidence()
840 return siftsEntry.getDbEvidence();
844 public String getDbSource()
846 return siftsEntry.getDbSource();
850 public String getDbVersion()
852 return siftsEntry.getDbVersion();