2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ws.sifts;
23 import jalview.analysis.AlignSeq;
24 import jalview.api.DBRefEntryI;
25 import jalview.api.SiftsClientI;
26 import jalview.datamodel.DBRefEntry;
27 import jalview.datamodel.DBRefSource;
28 import jalview.datamodel.SequenceI;
29 import jalview.schemes.ResidueProperties;
30 import jalview.structure.StructureMapping;
31 import jalview.util.Format;
32 import jalview.xml.binding.sifts.Entry;
33 import jalview.xml.binding.sifts.Entry.Entity;
34 import jalview.xml.binding.sifts.Entry.Entity.Segment;
35 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
36 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
37 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
38 import jalview.xml.binding.sifts.Entry.ListDB.Db;
41 import java.io.FileInputStream;
42 import java.io.FileNotFoundException;
43 import java.io.FileOutputStream;
44 import java.io.IOException;
45 import java.io.InputStream;
46 import java.io.PrintStream;
48 import java.net.URLConnection;
49 import java.util.ArrayList;
50 import java.util.Arrays;
51 import java.util.Collection;
52 import java.util.HashSet;
53 import java.util.List;
54 import java.util.TreeMap;
55 import java.util.zip.GZIPInputStream;
57 import javax.xml.bind.JAXBContext;
58 import javax.xml.bind.JAXBException;
59 import javax.xml.bind.Unmarshaller;
60 import javax.xml.stream.FactoryConfigurationError;
61 import javax.xml.stream.XMLInputFactory;
62 import javax.xml.stream.XMLStreamException;
63 import javax.xml.stream.XMLStreamReader;
66 import MCview.PDBChain;
67 import MCview.PDBfile;
69 public class SiftsClient implements SiftsClientI
71 private Entry siftsEntry;
77 private String structId;
79 private String segStartEnd;
81 private static final String UNIPROT_COORDINATE_SYS = "UniProt";
83 private static final String PDB_COORDINATE_SYS = "PDBresnum";
85 private String seqCoordSys = UNIPROT_COORDINATE_SYS;
87 private static final int BUFFER_SIZE = 4096;
89 public static final int UNASSIGNED = -1;
91 private static final int PDB_RES_POS = 0;
93 private static final int PDB_ATOM_POS = 1;
95 private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/";
97 public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System
98 .getProperty("user.home")
100 + ".sifts_downloads" + File.separatorChar;
102 public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache
103 .getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR);
105 private final static String NEWLINE = System.lineSeparator();
108 * Fetch SIFTs file for the given PDB Id and construct an instance of
113 public SiftsClient(PDBfile pdb)
119 File siftsFile = getSiftsFile(pdbId);
120 siftsEntry = parseSIFTs(siftsFile);
121 } catch (Exception e)
128 * Construct an instance of SiftsClient using the supplied SIFTs file -
129 * the SIFTs file should correspond to the given PDB Id
134 public SiftsClient(PDBfile pdb, File siftsFile)
140 siftsEntry = parseSIFTs(siftsFile);
141 } catch (Exception e)
149 * Parse the given SIFTs File and return a JAXB POJO of parsed data
152 * - the GZipped SIFTs XML file to parse
155 * if a problem occurs while parsing the SIFTs XML
157 private Entry parseSIFTs(File siftFile) throws Exception
161 System.out.println("File : " + siftFile.getAbsolutePath());
162 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
163 InputStream in = new FileInputStream(siftFile);
164 GZIPInputStream gzis = new GZIPInputStream(in);
165 XMLStreamReader streamReader = XMLInputFactory.newInstance()
166 .createXMLStreamReader(gzis);
167 Unmarshaller um = jc.createUnmarshaller();
168 return (Entry) um.unmarshal(streamReader);
169 } catch (JAXBException e)
172 } catch (FileNotFoundException e)
175 } catch (XMLStreamException e)
178 } catch (FactoryConfigurationError e)
181 } catch (IOException e)
185 throw new Exception("Error parsing siftFile");
189 * Get a SIFTs XML file for a given PDB Id
192 * @return SIFTs XML file
194 public static File getSiftsFile(String pdbId)
196 File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase()
198 if (siftsFile.exists())
200 // TODO it may be worth performing an age check to determine if a
201 // new SIFTs file should be re-downloaded as SIFTs entries are usually
203 System.out.println(">>> SIFTS File already downloaded for " + pdbId);
206 siftsFile = downloadSiftsFile(pdbId.toLowerCase());
211 * Download a SIFTs XML file for a given PDB Id
214 * @return downloaded SIFTs XML file
216 public static File downloadSiftsFile(String pdbId)
218 String siftFile = pdbId + ".xml.gz";
219 String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile;
220 String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile;
221 File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR);
222 if (!siftsDownloadDir.exists())
224 siftsDownloadDir.mkdirs();
228 System.out.println(">> Download ftp url : " + siftsFileFTPURL);
229 URL url = new URL(siftsFileFTPURL);
230 URLConnection conn = url.openConnection();
231 InputStream inputStream = conn.getInputStream();
232 FileOutputStream outputStream = new FileOutputStream(
233 downloadedSiftsFile);
234 byte[] buffer = new byte[BUFFER_SIZE];
236 while ((bytesRead = inputStream.read(buffer)) != -1)
238 outputStream.write(buffer, 0, bytesRead);
240 outputStream.close();
242 System.out.println(">>> File downloaded : " + downloadedSiftsFile);
243 } catch (IOException ex)
245 ex.printStackTrace();
247 return new File(downloadedSiftsFile);
251 * Delete the SIFTs file for the given PDB Id in the local SIFTs download
255 * @return true if the file was deleted or doesn't exist
257 public static boolean deleteSiftsFileByPDBId(String pdbId)
259 File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase()
261 if (siftsFile.exists())
263 return siftsFile.delete();
270 * Get a valid SIFTs DBRef for the given sequence current SIFTs entry
273 * - the target sequence for the operation
274 * @return a valid DBRefEntry that is SIFTs compatible
276 * if no valid source DBRefEntry was found for the given sequences
278 public DBRefEntryI getValidSourceDBRef(SequenceI seq)
279 throws SiftsException
281 DBRefEntryI sourceDBRef = null;
282 sourceDBRef = seq.getSourceDBRef();
283 if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
289 DBRefEntry[] dbRefs = seq.getDBRefs();
290 if (dbRefs == null || dbRefs.length < 1)
292 final SequenceI[] seqs = new SequenceI[] { seq };
293 new jalview.ws.DBRefFetcher(seqs, null, null, null, false)
295 dbRefs = seq.getDBRefs();
298 if (dbRefs == null || dbRefs.length < 1)
300 throw new SiftsException("Could not get source DB Ref");
303 for (DBRefEntryI dbRef : dbRefs)
305 if (dbRef == null || dbRef.getAccessionId() == null
306 || dbRef.getSource() == null)
310 if (isFoundInSiftsEntry(dbRef.getAccessionId())
311 && (dbRef.getSource().equalsIgnoreCase("uniprot") || dbRef
312 .getSource().equalsIgnoreCase("pdb")))
318 if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
322 throw new SiftsException("Could not get source DB Ref");
327 * Check that the DBRef Entry is properly populated and is available in the
328 * instantiated SIFTs Entry
331 * - DBRefEntry to validate
332 * @return true validation is successful otherwise false is returned.
334 private boolean isValidDBRefEntry(DBRefEntryI entry)
336 return entry != null && entry.getAccessionId() != null
337 && isFoundInSiftsEntry(entry.getAccessionId());
341 public HashSet<String> getAllMappingAccession()
343 HashSet<String> accessions = new HashSet<String>();
344 List<Entity> entities = siftsEntry.getEntity();
345 for (Entity entity : entities)
347 List<Segment> segments = entity.getSegment();
348 for (Segment segment : segments)
350 List<MapRegion> mapRegions = segment.getListMapRegion()
352 for (MapRegion mapRegion : mapRegions)
354 accessions.add(mapRegion.getDb().getDbAccessionId());
362 public StructureMapping getSiftsStructureMapping(SequenceI seq,
363 String pdbFile, String chain) throws SiftsException
365 structId = (chain == null) ? pdbId : pdbId + "|" + chain;
366 System.out.println("Getting mapping for: " + pdbId + "|" + chain
367 + " : seq- " + seq.getName());
369 final StringBuilder mappingDetails = new StringBuilder(128);
370 PrintStream ps = new PrintStream(System.out)
373 public void print(String x)
375 mappingDetails.append(x);
379 public void println()
381 mappingDetails.append(NEWLINE);
384 int[][] mapping = getGreedyMapping(chain, seq, ps);
386 String mappingOutput = mappingDetails.toString();
387 StructureMapping siftsMapping = new StructureMapping(seq, pdbFile,
388 pdbId, chain, mapping,
394 public int[][] getGreedyMapping(String entityId, SequenceI seq,
395 java.io.PrintStream os)
396 throws SiftsException
399 System.out.println("Generating mappings for : " + entityId);
400 Entity entity = null;
401 entity = getEntityById(entityId);
402 String originalSeq = AlignSeq.extractGaps(
403 jalview.util.Comparison.GapChars,
404 seq.getSequenceAsString());
405 int mapping[][] = new int[originalSeq.length() + seq.getStart()][2];
406 DBRefEntryI sourceDBRef = seq.getSourceDBRef();
407 if (sourceDBRef == null)
409 sourceDBRef = getValidSourceDBRef(seq);
410 // TODO ensure sequence start/end is in the same coordinate system and
411 // consistent with the choosen sourceDBRef
414 // set sequence coordinate system - default value is UniProt
415 if (sourceDBRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
417 seqCoordSys = PDB_COORDINATE_SYS;
420 ArrayList<String> dbRefAccessionIdsString = new ArrayList<String>();
421 for (DBRefEntry dbref : seq.getDBRefs())
423 dbRefAccessionIdsString.add(dbref.getAccessionId());
426 // initialise all mapping positions to unassigned
427 for (int residuePos[] : mapping)
429 residuePos[PDB_RES_POS] = UNASSIGNED;
430 residuePos[PDB_ATOM_POS] = UNASSIGNED;
433 TreeMap<Integer, String> resNumMap = new TreeMap<Integer, String>();
434 List<Segment> segments = entity.getSegment();
435 for (Segment segment : segments)
437 segStartEnd = segment.getStart() + " - " + segment.getEnd();
438 System.out.println("Mappging segments : " + segment.getSegId() + "\\"
440 List<Residue> residues = segment.getListResidue().getResidue();
441 for (Residue residue : residues)
443 int currSeqIndex = UNASSIGNED;
444 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
445 for (CrossRefDb cRefDb : cRefDbs)
447 if (cRefDb.getDbCoordSys().equalsIgnoreCase(seqCoordSys)
448 && dbRefAccessionIdsString.contains(cRefDb
449 .getDbAccessionId()))
451 String resNumIndexString = cRefDb.getDbResNum()
452 .equalsIgnoreCase("None") ? String.valueOf(UNASSIGNED)
453 : cRefDb.getDbResNum();
454 currSeqIndex = Integer.valueOf(resNumIndexString);
458 if (currSeqIndex == UNASSIGNED)
462 if (currSeqIndex > seq.getStart() && currSeqIndex <= seq.getEnd())
464 int resNum = Integer.valueOf(residue.getDbResNum());
465 mapping[currSeqIndex][PDB_RES_POS] = Integer.valueOf(resNum);
466 char resCharCode = ResidueProperties
467 .getSingleCharacterCode(residue.getDbResName());
468 resNumMap.put(currSeqIndex, String.valueOf(resCharCode));
474 populateAtomPositions(entityId, mapping);
475 } catch (Exception e)
479 padWithGaps(resNumMap);
485 boolean startDetected = false;
486 for (int[] x : mapping)
488 if (!startDetected && x[PDB_RES_POS] != UNASSIGNED)
491 startDetected = true;
492 // System.out.println("Seq start: "+ seqStart);
495 if (startDetected && x[PDB_RES_POS] != UNASSIGNED)
502 String matchedSeq = originalSeq;
503 if (seqStart != UNASSIGNED)
505 seqEnd = (seqEnd == UNASSIGNED) ? counter : seqEnd;
506 pdbStart = mapping[seqStart][PDB_RES_POS];
507 pdbEnd = mapping[seqEnd][PDB_RES_POS];
508 int orignalSeqStart = seq.getStart();
509 if (orignalSeqStart >= 1)
511 int subSeqStart = seqStart - orignalSeqStart;
512 int subSeqEnd = seqEnd - (orignalSeqStart - 1);
513 matchedSeq = originalSeq.substring(subSeqStart, subSeqEnd);
517 StringBuilder targetStrucSeqs = new StringBuilder();
518 for (String res : resNumMap.values())
520 targetStrucSeqs.append(res);
527 MappingOutputPojo mop = new MappingOutputPojo();
528 mop.setSeqStart(seqStart);
529 mop.setSeqEnd(seqEnd);
530 mop.setSeqName(seq.getName());
531 mop.setSeqResidue(matchedSeq);
533 mop.setStrStart(pdbStart);
534 mop.setStrEnd(pdbEnd);
535 mop.setStrName(structId);
536 mop.setStrResidue(targetStrucSeqs.toString());
539 os.print(getMappingOutput(mop).toString());
541 } catch (Exception ex)
543 ex.printStackTrace();
549 public boolean isFoundInSiftsEntry(String accessionId)
551 return accessionId != null
552 && getAllMappingAccession().contains(accessionId);
556 * Pads missing positions with gaps
560 void padWithGaps(TreeMap<Integer, String> resNumMap)
562 Integer[] keys = resNumMap.keySet().toArray(new Integer[0]);
564 int firstIndex = keys[0];
565 int lastIndex = keys[keys.length - 1];
566 System.out.println("Min value " + firstIndex);
567 System.out.println("Max value " + lastIndex);
568 for (int x = firstIndex; x <= lastIndex; x++)
570 if (!resNumMap.containsKey(x))
572 resNumMap.put(x, "-");
580 * Target chain to populate mapping of its atom positions.
582 * Two dimension array of residue index versus atom position
583 * @throws IllegalArgumentException
584 * Thrown if chainId or mapping is null
586 void populateAtomPositions(String chainId, int[][] mapping)
587 throws IllegalArgumentException
589 PDBChain chain = pdb.findChain(chainId);
590 if (chain == null || mapping == null)
592 throw new IllegalArgumentException(
593 "Chain id or mapping must not be null.");
595 for (int[] map : mapping)
597 if (map[PDB_RES_POS] != UNASSIGNED)
599 map[PDB_ATOM_POS] = getAtomIndex(map[PDB_RES_POS], chain.atoms);
606 * @param residueIndex
607 * The residue index used for the search
609 * A collection of Atom to search
610 * @return atom position for the given residue index
612 int getAtomIndex(int residueIndex, Collection<Atom> atoms)
616 throw new IllegalArgumentException(
617 "atoms collection must not be null!");
619 for (Atom atom : atoms)
621 if (atom.resNumber == residueIndex)
623 return atom.atomIndex;
630 public Entity getEntityById(String id) throws SiftsException
632 List<Entity> entities = siftsEntry.getEntity();
633 for (Entity entity : entities)
635 if (!entity.getEntityId().equalsIgnoreCase(id))
641 throw new SiftsException("Entity " + id + " not found");
645 public String[] getEntryDBs()
647 System.out.println("\nListing DB entries...");
648 List<String> availDbs = new ArrayList<String>();
649 List<Db> dbs = siftsEntry.getListDB().getDb();
652 availDbs.add(db.getDbSource());
653 System.out.println(db.getDbSource() + " | " + db.getDbCoordSys());
655 return availDbs.toArray(new String[0]);
659 public StringBuffer getMappingOutput(MappingOutputPojo mp)
660 throws SiftsException
662 String seqRes = mp.getSeqResidue();
663 String seqName = mp.getSeqName();
664 int sStart = mp.getSeqStart();
665 int sEnd = mp.getSeqEnd();
667 String strRes = mp.getStrResidue();
668 String strName = mp.getStrName();
669 int pdbStart = mp.getStrStart();
670 int pdbEnd = mp.getStrEnd();
672 String type = mp.getType();
674 int maxid = (seqName.length() >= strName.length()) ? seqName.length()
676 int len = 72 - maxid - 1;
678 int nochunks = ((seqRes.length()) / len)
679 + ((seqRes.length()) % len > 0 ? 1 : 0);
681 StringBuffer output = new StringBuffer();
682 output.append(NEWLINE);
683 output.append("Sequence ⟷ Structure mapping details").append(NEWLINE);
684 output.append("Method: SIFTS");
685 output.append(NEWLINE).append(NEWLINE);
687 output.append(new Format("%" + maxid + "s").form(seqName));
688 output.append(" : ");
689 output.append(String.valueOf(sStart));
690 output.append(" - ");
691 output.append(String.valueOf(sEnd));
692 output.append(" Maps to ");
693 output.append(NEWLINE);
694 output.append(new Format("%" + maxid + "s").form(structId));
695 output.append(" : ");
696 output.append(String.valueOf(pdbStart));
697 output.append(" - ");
698 output.append(String.valueOf(pdbEnd));
699 output.append(NEWLINE).append(NEWLINE);
701 int matchedSeqCount = 0;
702 for (int j = 0; j < nochunks; j++)
704 // Print the first aligned sequence
705 output.append(new Format("%" + (maxid) + "s").form(seqName)).append(
708 for (int i = 0; i < len; i++)
710 if ((i + (j * len)) < seqRes.length())
712 output.append(seqRes.charAt(i + (j * len)));
716 output.append(NEWLINE);
717 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
719 // Print out the matching chars
720 for (int i = 0; i < len; i++)
724 if ((i + (j * len)) < seqRes.length())
726 if (seqRes.charAt(i + (j * len)) == strRes.charAt(i + (j * len))
727 && !jalview.util.Comparison.isGap(seqRes.charAt(i
733 else if (type.equals("pep"))
735 if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)),
736 strRes.charAt(i + (j * len))) > 0)
750 } catch (IndexOutOfBoundsException e)
755 // Now print the second aligned sequence
756 output = output.append(NEWLINE);
757 output = output.append(new Format("%" + (maxid) + "s").form(strName))
759 for (int i = 0; i < len; i++)
761 if ((i + (j * len)) < strRes.length())
763 output.append(strRes.charAt(i + (j * len)));
766 output.append(NEWLINE).append(NEWLINE);
768 float pid = (float) matchedSeqCount / seqRes.length() * 100;
769 output.append("Length of alignment = " + seqRes.length())
771 output.append(new Format("Percentage ID = %2.2f").form(pid));
772 output.append(NEWLINE);
777 public int getEntityCount()
779 return siftsEntry.getEntity().size();
783 public String getDbAccessionId()
785 return siftsEntry.getDbAccessionId();
789 public String getDbCoordSys()
791 return siftsEntry.getDbCoordSys();
795 public String getDbEvidence()
797 return siftsEntry.getDbEvidence();
801 public String getDbSource()
803 return siftsEntry.getDbSource();
807 public String getDbVersion()
809 return siftsEntry.getDbVersion();