2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignSeq;
24 import jalview.api.DBRefEntryI;
25 import jalview.api.SiftsClientI;
26 import jalview.datamodel.DBRefEntry;
27 import jalview.datamodel.SequenceI;
28 import jalview.schemes.ResidueProperties;
29 import jalview.structure.StructureMapping;
30 import jalview.util.Format;
31 import jalview.xml.binding.sifts.Entry;
32 import jalview.xml.binding.sifts.Entry.Entity;
33 import jalview.xml.binding.sifts.Entry.Entity.Segment;
34 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion;
35 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue;
36 import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb;
37 import jalview.xml.binding.sifts.Entry.EntryDetail;
38 import jalview.xml.binding.sifts.Entry.ListDB.Db;
41 import java.io.FileInputStream;
42 import java.io.FileNotFoundException;
43 import java.io.FileOutputStream;
44 import java.io.IOException;
45 import java.io.InputStream;
46 import java.io.PrintStream;
48 import java.net.URLConnection;
49 import java.util.HashMap;
50 import java.util.HashSet;
51 import java.util.List;
52 import java.util.zip.GZIPInputStream;
54 import javax.xml.bind.JAXBContext;
55 import javax.xml.bind.JAXBException;
56 import javax.xml.bind.Unmarshaller;
57 import javax.xml.stream.FactoryConfigurationError;
58 import javax.xml.stream.XMLInputFactory;
59 import javax.xml.stream.XMLStreamException;
60 import javax.xml.stream.XMLStreamReader;
62 public class SiftsClient implements SiftsClientI
64 private Entry siftsEntry;
68 private static final int BUFFER_SIZE = 4096;
70 private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/";
72 public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System
73 .getProperty("user.home")
75 + ".sifts_downloads" + File.separatorChar;
77 public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache
78 .getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR);
80 private final static String NEWLINE = System.lineSeparator();
83 * Fetch SIFTs file for the given PDB Id and construct an instance of
88 public SiftsClient(String pdbId)
93 File siftsFile = getSiftsFile(pdbId);
94 siftsEntry = parseSIFTs(siftsFile);
102 * Construct an instance of SiftsClient using the supplied SIFTs file -
103 * the SIFTs file should correspond to the given PDB Id
108 public SiftsClient(String pdbId, File siftsFile)
113 siftsEntry = parseSIFTs(siftsFile);
114 } catch (Exception e)
122 * Parse the given SIFTs File and return a JAXB POJO of parsed data
125 * - the GZipped SIFTs XML file to parse
128 * if a problem occurs while parsing the SIFTs XML
130 private Entry parseSIFTs(File siftFile) throws Exception
134 System.out.println("File : " + siftFile.getAbsolutePath());
135 JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts");
136 InputStream in = new FileInputStream(siftFile);
137 GZIPInputStream gzis = new GZIPInputStream(in);
138 XMLStreamReader streamReader = XMLInputFactory.newInstance()
139 .createXMLStreamReader(gzis);
140 Unmarshaller um = jc.createUnmarshaller();
141 return (Entry) um.unmarshal(streamReader);
142 } catch (JAXBException e)
145 } catch (FileNotFoundException e)
148 } catch (XMLStreamException e)
151 } catch (FactoryConfigurationError e)
154 } catch (IOException e)
158 throw new Exception("Error parsing siftFile");
162 * Get a SIFTs XML file for a given PDB Id
165 * @return SIFTs XML file
167 public static File getSiftsFile(String pdbId)
169 File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase()
171 if (siftsFile.exists())
173 // TODO it may be worth performing a timestamp age check to determine if a
174 // new SIFTs file should be re-downloaded as SIFTs entries are usually
176 System.out.println(">>> SIFTS File already downloaded for " + pdbId);
179 siftsFile = downloadSiftsFile(pdbId.toLowerCase());
184 * Download a SIFTs XML file for a given PDB Id
187 * @return downloaded SIFTs XML file
189 public static File downloadSiftsFile(String pdbId)
191 String siftFile = pdbId + ".xml.gz";
192 String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile;
193 String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile;
194 File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR);
195 if (!siftsDownloadDir.exists())
197 siftsDownloadDir.mkdirs();
201 System.out.println(">> Download ftp url : " + siftsFileFTPURL);
202 URL url = new URL(siftsFileFTPURL);
203 URLConnection conn = url.openConnection();
204 InputStream inputStream = conn.getInputStream();
205 FileOutputStream outputStream = new FileOutputStream(
206 downloadedSiftsFile);
207 byte[] buffer = new byte[BUFFER_SIZE];
209 while ((bytesRead = inputStream.read(buffer)) != -1)
211 outputStream.write(buffer, 0, bytesRead);
213 outputStream.close();
215 System.out.println(">>> File downloaded : " + downloadedSiftsFile);
216 } catch (IOException ex)
218 ex.printStackTrace();
220 return new File(downloadedSiftsFile);
224 * Delete the SIFTs file for the given PDB Id in the local SIFTs download
228 * @return true if the file was deleted or doesn't exist
230 public static boolean deleteSiftsFileByPDBId(String pdbId)
232 File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase()
234 if (siftsFile.exists())
236 return siftsFile.delete();
243 * Get a valid SIFTs DBRef for the given sequence current SIFTs entry
246 * - the target sequence for the operation
247 * @return a valid DBRefEntry that is SIFTs compatible
249 * if no valid source DBRefEntry was found for the given sequences
251 public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws Exception
253 DBRefEntryI sourceDBRef = null;
254 sourceDBRef = seq.getSourceDBRef();
255 if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
261 DBRefEntry[] dbRefs = seq.getDBRefs();
262 if (dbRefs == null || dbRefs.length < 1)
264 final SequenceI[] seqs = new SequenceI[] { seq };
265 new jalview.ws.DBRefFetcher(seqs, null, null, null, false)
267 dbRefs = seq.getDBRefs();
270 if (dbRefs == null || dbRefs.length < 1)
272 throw new Exception("Could not get source DB Ref");
275 for (DBRefEntryI dbRef : dbRefs)
277 if (dbRef == null || dbRef.getAccessionId() == null
278 || dbRef.getSource() == null)
282 if (isFoundInSiftsEntry(dbRef.getAccessionId())
283 && (dbRef.getSource().equalsIgnoreCase("uniprot") || dbRef
284 .getSource().equalsIgnoreCase("pdb")))
290 if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
294 throw new Exception("Could not get source DB Ref");
299 * Check that the DBRef Entry is properly populated and is available in the
300 * instantiated SIFTs Entry
303 * - DBRefEntry to validate
304 * @return true validation is successful otherwise false is returned.
306 private boolean isValidDBRefEntry(DBRefEntryI entry)
308 return entry != null && entry.getAccessionId() != null
309 && isFoundInSiftsEntry(entry.getAccessionId());
310 // & entry.getStartRes() > 0;
314 public HashSet<String> getAllMappingAccession()
316 HashSet<String> accessions = new HashSet<String>();
317 List<Entity> entities = siftsEntry.getEntity();
318 for (Entity entity : entities)
320 List<Segment> segments = entity.getSegment();
321 for (Segment segment : segments)
323 List<MapRegion> mapRegions = segment.getListMapRegion()
325 for (MapRegion mapRegion : mapRegions)
327 accessions.add(mapRegion.getDb().getDbAccessionId());
336 public int[][] getGreedyMapping(String entityId, SequenceI seq,
337 java.io.PrintStream os)
340 System.out.println("Generating mappings for : " + entityId);
341 Entity entity = null;
342 entity = getEntityById(entityId);
343 String seqStr = AlignSeq.extractGaps(jalview.util.Comparison.GapChars,
344 seq.getSequenceAsString());
345 // StringBuilder mappedStrucSeq = new StringBuilder(seqStr.length());
346 String[] mappedStrucSeq = new String[seqStr.length()];
347 int mapping[][] = new int[seqStr.length()][2];
348 DBRefEntryI sourceDBRef = seq.getSourceDBRef();
349 if (sourceDBRef == null)
351 sourceDBRef = getValidSourceDBRef(seq);
352 // TODO if sourceDBRef is null at this point then throw an Exception
354 // TODO update sequence start/end with sourceDBRef start/end
355 // seq.setStart(sourceDBRef.getStartRes());
356 // seq.setEnd(sourceDBRef.getEndRes());
359 String crossRefAccessionId = sourceDBRef.getAccessionId();
360 int start = seq.getStart() - 1;
361 for (int residue[] : mapping)
363 residue[1] = start++;
366 HashMap<Integer, String> resNumMap = new HashMap<Integer, String>();
367 List<Segment> segments = entity.getSegment();
368 for (Segment segment : segments)
370 System.out.println("Mappging segments : " + segment.getSegId() + "\\"
371 + segment.getStart() + "-" + segment.getEnd());
372 List<Residue> residues = segment.getListResidue().getResidue();
373 for (Residue residue : residues)
375 int refDbResNum = -1;
376 List<CrossRefDb> cRefDbs = residue.getCrossRefDb();
377 for (CrossRefDb cRefDb : cRefDbs)
379 if (cRefDb.getDbAccessionId().equalsIgnoreCase(
380 crossRefAccessionId))
382 refDbResNum = Integer.valueOf(cRefDb.getDbResNum());
385 if (refDbResNum == -1)
389 for (int[] x : mapping)
391 if (x[1] == refDbResNum)
393 int resNum = Integer.valueOf(residue.getDbResNum());
396 resNumMap.put(resNum, value);
402 //Generate visual mapping output
403 // StringBuilder strucSeq = new StringBuilder();
404 // for(int[] x : mapping){
405 // if(mapping[0] == 0){
406 // strucSeq.append(b)
409 mappedStrucSeq[1] = "x";
412 System.out.println(">>>> seq: " + seqStr + "\nlength "
414 System.out.println(">>>> pdb: " + mappedStrucSeq.toString()
415 + "\nlength " + mappedStrucSeq.toString().length());
417 String printedMapping = getMappingOutput(mappedStrucSeq.toString(),
418 seqStr, "seqAccession", "strucAccession", "pep", 3)
422 os.print(printedMapping);
424 System.out.println();
425 } catch (Exception ex)
427 ex.printStackTrace();
433 public boolean isFoundInSiftsEntry(String accessionId)
435 return accessionId != null
436 && getAllMappingAccession().contains(accessionId);
440 public StructureMapping getSiftsStructureMapping(SequenceI seq,
441 String pdbFile, String chain)
443 System.out.println("Getting mapping for: " + pdbId + "|" + chain
444 + " : seq- " + seq.getName());
446 final StringBuilder mappingDetails = new StringBuilder(128);
447 PrintStream ps = new PrintStream(System.out)
450 public void print(String x)
452 mappingDetails.append(x);
456 public void println()
458 mappingDetails.append(NEWLINE);
461 int[][] mapping = null;
464 mapping = getGreedyMapping(chain, seq, ps);
465 } catch (Exception e)
469 // String mappingOutput = mappingDetails.toString();
470 String mappingOutput = null;
471 return new StructureMapping(seq, pdbFile, pdbId, chain, mapping,
476 public Entity getEntityById(String id) throws Exception
478 List<Entity> entities = siftsEntry.getEntity();
479 for (Entity entity : entities)
481 if (!entity.getEntityId().equalsIgnoreCase(id))
487 throw new Exception("Entity " + id + " not found");
491 public String[] getEntryDBs()
493 System.out.println("\nListing DB entries...");
494 List<Db> dbs = siftsEntry.getListDB().getDb();
497 System.out.println(db.getDbSource() + " | " + db.getDbCoordSys());
503 public void getEntryDetails()
505 List<EntryDetail> eds = siftsEntry.getEntryDetail();
506 for (EntryDetail ed : eds)
508 System.out.println("Entry Details: " + ed.getContent() + " "
509 + ed.getDbSource() + " " + ed.getProperty() + " "
515 public StringBuffer getMappingOutput(String astr1, String astr2, String s1id,
516 String s2id, String type, int nochunks)
518 int maxid = s1id.length();
519 int len = 72 - maxid - 1;
520 StringBuffer output = new StringBuffer();
523 for (int j = 0; j < nochunks; j++)
525 // Print the first aligned sequence
526 output.append(new Format("%" + (maxid) + "s").form(s1id)).append(" ");
528 for (int i = 0; i < len; i++)
530 if ((i + (j * len)) < astr1.length())
532 output.append(astr1.charAt(i + (j * len)));
536 output.append(NEWLINE);
537 output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
539 // Print out the matching chars
540 for (int i = 0; i < len; i++)
542 if ((i + (j * len)) < astr1.length())
544 if (astr1.charAt(i + (j * len)) == astr2.charAt(i + (j * len))
545 && !jalview.util.Comparison.isGap(astr1.charAt(i
551 else if (type.equals("pep"))
553 if (ResidueProperties.getPAM250(astr1.charAt(i + (j * len)),
554 astr2.charAt(i + (j * len))) > 0)
569 // Now print the second aligned sequence
570 output = output.append(NEWLINE);
571 output = output.append(new Format("%" + (maxid) + "s").form(s2id))
573 for (int i = 0; i < len; i++)
575 if ((i + (j * len)) < astr2.length())
577 output.append(astr2.charAt(i + (j * len)));
580 output.append(NEWLINE).append(NEWLINE);
582 pid = pid / (astr1.length()) * 100;
583 System.out.println(output);
584 System.out.println(pid);
585 // TODO return output & pid
590 public int getEntityCount()
592 return siftsEntry.getEntity().size();
596 public String getDbAccessionId()
598 return siftsEntry.getDbAccessionId();
602 public String getDbCoordSys()
604 return siftsEntry.getDbCoordSys();
608 public String getDbEvidence()
610 return siftsEntry.getDbEvidence();
614 public String getDbSource()
616 return siftsEntry.getDbSource();
620 public String getDbVersion()
622 return siftsEntry.getDbVersion();