/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.ws.sifts; import jalview.analysis.AlignSeq; import jalview.api.DBRefEntryI; import jalview.api.SiftsClientI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; import jalview.structure.StructureMapping; import jalview.util.Format; import jalview.xml.binding.sifts.Entry; import jalview.xml.binding.sifts.Entry.Entity; import jalview.xml.binding.sifts.Entry.Entity.Segment; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListMapRegion.MapRegion; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue; import jalview.xml.binding.sifts.Entry.Entity.Segment.ListResidue.Residue.CrossRefDb; import jalview.xml.binding.sifts.Entry.ListDB.Db; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.zip.GZIPInputStream; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import javax.xml.stream.FactoryConfigurationError; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; public class SiftsClient implements SiftsClientI { private Entry siftsEntry; private String pdbId; private String structId; private String segStartEnd; private static final int BUFFER_SIZE = 4096; private static final String SIFTS_FTP_BASE_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/"; public static final String DEFAULT_SIFTS_DOWNLOAD_DIR = System .getProperty("user.home") + File.separatorChar + ".sifts_downloads" + File.separatorChar; public static final String SIFTS_DOWNLOAD_DIR = jalview.bin.Cache .getDefault("sifts_download_dir", DEFAULT_SIFTS_DOWNLOAD_DIR); private final static String NEWLINE = System.lineSeparator(); /** * Fetch SIFTs file for the given PDB Id and construct an instance of * SiftsClient * * @param pdbId */ public SiftsClient(String pdbId) { this.pdbId = pdbId; try { File siftsFile = getSiftsFile(pdbId); siftsEntry = parseSIFTs(siftsFile); } catch (Exception e) { e.printStackTrace(); } } /** * Construct an instance of SiftsClient using the supplied SIFTs file - * the SIFTs file should correspond to the given PDB Id * * @param pdbId * @param siftsFile */ public SiftsClient(String pdbId, File siftsFile) { this.pdbId = pdbId; try { siftsEntry = parseSIFTs(siftsFile); } catch (Exception e) { e.printStackTrace(); } } /** * Parse the given SIFTs File and return a JAXB POJO of parsed data * * @param siftFile * - the GZipped SIFTs XML file to parse * @return * @throws Exception * if a problem occurs while parsing the SIFTs XML */ private Entry parseSIFTs(File siftFile) throws Exception { try { System.out.println("File : " + siftFile.getAbsolutePath()); JAXBContext jc = JAXBContext.newInstance("jalview.xml.binding.sifts"); InputStream in = new FileInputStream(siftFile); GZIPInputStream gzis = new GZIPInputStream(in); XMLStreamReader streamReader = XMLInputFactory.newInstance() .createXMLStreamReader(gzis); Unmarshaller um = jc.createUnmarshaller(); return (Entry) um.unmarshal(streamReader); } catch (JAXBException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (XMLStreamException e) { e.printStackTrace(); } catch (FactoryConfigurationError e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } throw new Exception("Error parsing siftFile"); } /** * Get a SIFTs XML file for a given PDB Id * * @param pdbId * @return SIFTs XML file */ public static File getSiftsFile(String pdbId) { File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() + ".xml.gz"); if (siftsFile.exists()) { // TODO it may be worth performing a timestamp age check to determine if a // new SIFTs file should be re-downloaded as SIFTs entries are usually // updated weekly System.out.println(">>> SIFTS File already downloaded for " + pdbId); return siftsFile; } siftsFile = downloadSiftsFile(pdbId.toLowerCase()); return siftsFile; } /** * Download a SIFTs XML file for a given PDB Id * * @param pdbId * @return downloaded SIFTs XML file */ public static File downloadSiftsFile(String pdbId) { String siftFile = pdbId + ".xml.gz"; String siftsFileFTPURL = SIFTS_FTP_BASE_URL + siftFile; String downloadedSiftsFile = SIFTS_DOWNLOAD_DIR + siftFile; File siftsDownloadDir = new File(SIFTS_DOWNLOAD_DIR); if (!siftsDownloadDir.exists()) { siftsDownloadDir.mkdirs(); } try { System.out.println(">> Download ftp url : " + siftsFileFTPURL); URL url = new URL(siftsFileFTPURL); URLConnection conn = url.openConnection(); InputStream inputStream = conn.getInputStream(); FileOutputStream outputStream = new FileOutputStream( downloadedSiftsFile); byte[] buffer = new byte[BUFFER_SIZE]; int bytesRead = -1; while ((bytesRead = inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, bytesRead); } outputStream.close(); inputStream.close(); System.out.println(">>> File downloaded : " + downloadedSiftsFile); } catch (IOException ex) { ex.printStackTrace(); } return new File(downloadedSiftsFile); } /** * Delete the SIFTs file for the given PDB Id in the local SIFTs download * directory * * @param pdbId * @return true if the file was deleted or doesn't exist */ public static boolean deleteSiftsFileByPDBId(String pdbId) { File siftsFile = new File(SIFTS_DOWNLOAD_DIR + pdbId.toLowerCase() + ".xml.gz"); if (siftsFile.exists()) { return siftsFile.delete(); } return true; } /** * Get a valid SIFTs DBRef for the given sequence current SIFTs entry * * @param seq * - the target sequence for the operation * @return a valid DBRefEntry that is SIFTs compatible * @throws Exception * if no valid source DBRefEntry was found for the given sequences */ public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws SiftsException { DBRefEntryI sourceDBRef = null; sourceDBRef = seq.getSourceDBRef(); if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) { return sourceDBRef; } else { DBRefEntry[] dbRefs = seq.getDBRefs(); if (dbRefs == null || dbRefs.length < 1) { final SequenceI[] seqs = new SequenceI[] { seq }; new jalview.ws.DBRefFetcher(seqs, null, null, null, false) .fetchDBRefs(true); dbRefs = seq.getDBRefs(); } if (dbRefs == null || dbRefs.length < 1) { throw new SiftsException("Could not get source DB Ref"); } for (DBRefEntryI dbRef : dbRefs) { if (dbRef == null || dbRef.getAccessionId() == null || dbRef.getSource() == null) { continue; } if (isFoundInSiftsEntry(dbRef.getAccessionId()) && (dbRef.getSource().equalsIgnoreCase("uniprot") || dbRef .getSource().equalsIgnoreCase("pdb"))) { return dbRef; } } } if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) { return sourceDBRef; } throw new SiftsException("Could not get source DB Ref"); } /** * Check that the DBRef Entry is properly populated and is available in the * instantiated SIFTs Entry * * @param entry * - DBRefEntry to validate * @return true validation is successful otherwise false is returned. */ private boolean isValidDBRefEntry(DBRefEntryI entry) { return entry != null && entry.getAccessionId() != null && isFoundInSiftsEntry(entry.getAccessionId()); // & entry.getStartRes() > 0; } @Override public HashSet getAllMappingAccession() { HashSet accessions = new HashSet(); List entities = siftsEntry.getEntity(); for (Entity entity : entities) { List segments = entity.getSegment(); for (Segment segment : segments) { List mapRegions = segment.getListMapRegion() .getMapRegion(); for (MapRegion mapRegion : mapRegions) { accessions.add(mapRegion.getDb().getDbAccessionId()); } } } return accessions; } @Override public StructureMapping getSiftsStructureMapping(SequenceI seq, String pdbFile, String chain) throws SiftsException { structId = (chain == null) ? pdbId : pdbId + "|" + chain; System.out.println("Getting mapping for: " + pdbId + "|" + chain + " : seq- " + seq.getName()); final StringBuilder mappingDetails = new StringBuilder(128); PrintStream ps = new PrintStream(System.out) { @Override public void print(String x) { mappingDetails.append(x); } @Override public void println() { mappingDetails.append(NEWLINE); } }; int[][] mapping = getGreedyMapping(chain, seq, ps); String mappingOutput = mappingDetails.toString(); return new StructureMapping(seq, pdbFile, pdbId, chain, mapping, mappingOutput); } @Override public int[][] getGreedyMapping(String entityId, SequenceI seq, java.io.PrintStream os) throws SiftsException { int matchedResStart = -1; int matchedResEnd = -1; int counter = 0; int pdbStart = -1; int pdbEnd = -1; int sStart = -1; int sEnd = -1; boolean startDetected = false; System.out.println("Generating mappings for : " + entityId); Entity entity = null; entity = getEntityById(entityId); String seqStr = AlignSeq.extractGaps(jalview.util.Comparison.GapChars, seq.getSequenceAsString()); int mapping[][] = new int[seqStr.length() + seq.getStart()][2]; DBRefEntryI sourceDBRef = seq.getSourceDBRef(); if (sourceDBRef == null) { sourceDBRef = getValidSourceDBRef(seq); // TODO update sequence start/end with sourceDBRef start/end // seq.setStart(sourceDBRef.getStartRes()); // seq.setEnd(sourceDBRef.getEndRes()); } String crossRefAccessionId = sourceDBRef.getAccessionId(); int count = 0; for (int residue[] : mapping) { residue[1] = count++; residue[0] = -1; } LinkedHashMap resNumMap = new LinkedHashMap(); List segments = entity.getSegment(); for (Segment segment : segments) { segStartEnd = segment.getStart() + " - " + segment.getEnd(); System.out.println("Mappging segments : " + segment.getSegId() + "\\" + segStartEnd); List residues = segment.getListResidue().getResidue(); for (Residue residue : residues) { int refDbResNum = -1; List cRefDbs = residue.getCrossRefDb(); for (CrossRefDb cRefDb : cRefDbs) { if (cRefDb.getDbAccessionId().equalsIgnoreCase( crossRefAccessionId)) { refDbResNum = Integer.valueOf(cRefDb.getDbResNum()); } } if (refDbResNum == -1) { continue; } int loopCount = 0; for (int[] x : mapping) { if (loopCount > seq.getStart() && x[1] == refDbResNum) { int resNum = Integer.valueOf(residue.getDbResNum()); x[0] = resNum; char resCharCode = ResidueProperties .getSingleCharacterCode(residue.getDbResName()); resNumMap.put(resNum, String.valueOf(resCharCode)); } ++loopCount; } } } for (int[] x : mapping) { if (!startDetected && x[0] > -1) { matchedResStart = counter; // System.out.println(matchedResStart); startDetected = true; } if (startDetected && x[0] == -1) { matchedResEnd = counter; } ++counter; } String matchedSeqStr = seqStr; if (matchedResStart != -1) { matchedResEnd = (matchedResEnd == -1) ? counter : matchedResEnd; pdbStart = mapping[matchedResStart][0]; pdbEnd = mapping[matchedResEnd - 1][0]; sStart = mapping[matchedResStart][1]; sEnd = mapping[matchedResEnd - 1][1]; int seqStart = seq.getStart(); if (seqStart > 1) { matchedResStart = matchedResStart - seqStart; matchedResEnd = matchedResEnd - seqStart; } else { --matchedResStart; --matchedResEnd; } matchedSeqStr = seqStr.substring(matchedResStart, matchedResEnd); } StringBuilder targetStrucSeqs = new StringBuilder(); for (String res : resNumMap.values()) { targetStrucSeqs.append(res); } try { if (os != null) { MappingOutputPojo mop = new MappingOutputPojo(); mop.setSeqStart(sStart); mop.setSeqEnd(sEnd); mop.setSeqName(seq.getName()); mop.setSeqResidue(matchedSeqStr); mop.setStrStart(pdbStart); mop.setStrEnd(pdbEnd); mop.setStrName(structId); mop.setStrResidue(targetStrucSeqs.toString()); mop.setType("pep"); os.print(getMappingOutput(mop).toString()); } } catch (Exception ex) { ex.printStackTrace(); } return mapping; } @Override public boolean isFoundInSiftsEntry(String accessionId) { return accessionId != null && getAllMappingAccession().contains(accessionId); } @Override public Entity getEntityById(String id) throws SiftsException { List entities = siftsEntry.getEntity(); for (Entity entity : entities) { if (!entity.getEntityId().equalsIgnoreCase(id)) { continue; } return entity; } throw new SiftsException("Entity " + id + " not found"); } @Override public String[] getEntryDBs() { System.out.println("\nListing DB entries..."); List availDbs = new ArrayList(); List dbs = siftsEntry.getListDB().getDb(); for (Db db : dbs) { availDbs.add(db.getDbSource()); System.out.println(db.getDbSource() + " | " + db.getDbCoordSys()); } return availDbs.toArray(new String[0]); } @Override public StringBuffer getMappingOutput(MappingOutputPojo mp) { String seqRes = mp.getSeqResidue(); String seqName = mp.getSeqName(); int sStart = mp.getSeqStart(); int sEnd = mp.getSeqEnd(); String strRes = mp.getStrResidue(); String strName = mp.getStrName(); int pdbStart = mp.getStrStart(); int pdbEnd = mp.getStrEnd(); String type = mp.getType(); int maxid = (seqName.length() >= strName.length()) ? seqName.length() : strName.length(); int len = 72 - maxid - 1; // int nochunks = 2;// mp.getWrapHeight(); int nochunks = ((seqRes.length()) / len) + ((seqRes.length()) % len > 0 ? 1 : 0); // output mappings StringBuffer output = new StringBuffer(); output.append(NEWLINE); output.append("Sequence ⟷ Structure mapping details:"); output.append(NEWLINE).append(NEWLINE); output.append(new Format("%" + maxid + "s").form(seqName)); output.append(" : "); output.append(String.valueOf(sStart)); output.append(" - "); output.append(String.valueOf(sEnd)); output.append(" Maps to "); output.append(NEWLINE); output.append(new Format("%" + maxid + "s").form(structId)); output.append(" : "); output.append(String.valueOf(pdbStart)); output.append(" - "); output.append(String.valueOf(pdbEnd)); output.append(NEWLINE).append(NEWLINE); float pid = 0; for (int j = 0; j < nochunks; j++) { // Print the first aligned sequence output.append(new Format("%" + (maxid) + "s").form(seqName)).append( " "); for (int i = 0; i < len; i++) { if ((i + (j * len)) < seqRes.length()) { output.append(seqRes.charAt(i + (j * len))); } } output.append(NEWLINE); output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); // Print out the matching chars for (int i = 0; i < len; i++) { if ((i + (j * len)) < seqRes.length()) { if (seqRes.charAt(i + (j * len)) == strRes.charAt(i + (j * len)) && !jalview.util.Comparison.isGap(seqRes.charAt(i + (j * len)))) { pid++; output.append("|"); } else if (type.equals("pep")) { if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), strRes.charAt(i + (j * len))) > 0) { output.append("."); } else { output.append(" "); } } else { output.append(" "); } } } // Now print the second aligned sequence output = output.append(NEWLINE); output = output.append(new Format("%" + (maxid) + "s").form(strName)) .append(" "); for (int i = 0; i < len; i++) { if ((i + (j * len)) < strRes.length()) { output.append(strRes.charAt(i + (j * len))); } } output.append(NEWLINE).append(NEWLINE); } pid = pid / (seqRes.length()) * 100; output.append("Length of alignment = " + seqRes.length()) .append(NEWLINE); output.append(new Format("Percentage ID = %2.2f").form(pid)); output.append(NEWLINE); output.append("Mapping method: SIFTS").append(NEWLINE); return output; } @Override public int getEntityCount() { return siftsEntry.getEntity().size(); } @Override public String getDbAccessionId() { return siftsEntry.getDbAccessionId(); } @Override public String getDbCoordSys() { return siftsEntry.getDbCoordSys(); } @Override public String getDbEvidence() { return siftsEntry.getDbEvidence(); } @Override public String getDbSource() { return siftsEntry.getDbSource(); } @Override public String getDbVersion() { return siftsEntry.getDbVersion(); } }